Introduce LLM class for offline inference (#115)
This commit is contained in:
@@ -1,19 +1,15 @@
|
||||
from cacheflow.entrypoints.llm import LLM
|
||||
from cacheflow.outputs import RequestOutput
|
||||
from cacheflow.sampling_params import SamplingParams
|
||||
from cacheflow.server.arg_utils import (
|
||||
add_server_arguments,
|
||||
create_server_configs_from_args,
|
||||
initialize_server_from_args,
|
||||
)
|
||||
from cacheflow.server.arg_utils import ServerArgs
|
||||
from cacheflow.server.llm_server import LLMServer
|
||||
from cacheflow.server.ray_utils import initialize_cluster
|
||||
|
||||
__all__ = [
|
||||
"RequestOutput",
|
||||
"LLM",
|
||||
"SamplingParams",
|
||||
"RequestOutput",
|
||||
"LLMServer",
|
||||
"add_server_arguments",
|
||||
"create_server_configs_from_args",
|
||||
"initialize_server_from_args",
|
||||
"ServerArgs",
|
||||
"initialize_cluster",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user