Introduce LLM class for offline inference (#115)

This commit is contained in:
Woosuk Kwon
2023-05-21 17:04:18 -07:00
committed by GitHub
parent f746ced08d
commit 655a5e48df
9 changed files with 222 additions and 81 deletions

View File

@@ -12,8 +12,7 @@ import uvicorn
from cacheflow.outputs import RequestOutput
from cacheflow.sampling_params import SamplingParams
from cacheflow.server.arg_utils import (
add_server_arguments, create_server_configs_from_args)
from cacheflow.server.arg_utils import ServerArgs
from cacheflow.server.llm_server import LLMServer
from cacheflow.server.ray_utils import initialize_cluster
@@ -116,10 +115,10 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--host", type=str, default="localhost")
parser.add_argument("--port", type=int, default=10002)
parser = add_server_arguments(parser)
parser = ServerArgs.add_cli_args(parser)
args = parser.parse_args()
server_configs = create_server_configs_from_args(args)
server_configs = ServerArgs.from_cli_args(args).create_server_configs()
parallel_config = server_configs[2]
distributed_init_method, stage_devices = initialize_cluster(parallel_config)