Rename servers and change port numbers to reduce confusion (#149)

This commit is contained in:
Zhuohan Li
2023-06-17 00:13:02 +08:00
committed by GitHub
parent 311490a720
commit eedb46bf03
10 changed files with 41 additions and 37 deletions

View File

@@ -6,7 +6,7 @@ from tqdm import tqdm
from cacheflow.outputs import RequestOutput
from cacheflow.sampling_params import SamplingParams
from cacheflow.server.arg_utils import ServerArgs
from cacheflow.server.llm_server import LLMServer
from cacheflow.server.llm_server import LLMEngine
from cacheflow.utils import Counter
@@ -20,7 +20,7 @@ class LLM:
mechanism and efficient memory management.
NOTE: This class is intended to be used for offline inference. For online
serving, use the `AsyncLLMServer` class instead.
serving, use the `AsyncLLMEngine` class instead.
NOTE: For the comprehensive list of arguments, see `ServerArgs`.
Args:
@@ -52,7 +52,7 @@ class LLM:
seed=seed,
**kwargs,
)
self.llm_server = LLMServer.from_server_args(server_args)
self.llm_server = LLMEngine.from_server_args(server_args)
self.request_counter = Counter()
def get_tokenizer(