Introduce LLM class for offline inference (#115)

2023-05-21 17:04:18 -07:00
parent f746ced08d
commit 655a5e48df
9 changed files with 222 additions and 81 deletions
--- a/cacheflow/init.py
+++ b/cacheflow/init.py
@@ -1,19 +1,15 @@
+from cacheflow.entrypoints.llm import LLM
 from cacheflow.outputs import RequestOutput
 from cacheflow.sampling_params import SamplingParams
-from cacheflow.server.arg_utils import (
-    add_server_arguments,
-    create_server_configs_from_args,
-    initialize_server_from_args,
-)
+from cacheflow.server.arg_utils import ServerArgs
 from cacheflow.server.llm_server import LLMServer
 from cacheflow.server.ray_utils import initialize_cluster

 __all__ = [
-    "RequestOutput",
+    "LLM",
    "SamplingParams",
+    "RequestOutput",
    "LLMServer",
-    "add_server_arguments",
-    "create_server_configs_from_args",
-    "initialize_server_from_args",
+    "ServerArgs",
    "initialize_cluster",
 ]