10 lines
325 B
Python
10 lines
325 B
Python
from .accel_engine import AccelInferenceEngine # noqa: F401
|
|
from .attention import ( # noqa: F401
|
|
Attention,
|
|
get_forward_context,
|
|
reset_forward_context,
|
|
set_forward_context,
|
|
)
|
|
from .gpt2_accel import GPT2AccelAttention, GPT2AccelModel # noqa: F401
|
|
from .kv_manager import KVCacheManager, Seq # noqa: F401
|