Introduce LLM class for offline inference (#115)
This commit is contained in:
@@ -12,8 +12,7 @@ import uvicorn
|
||||
|
||||
from cacheflow.outputs import RequestOutput
|
||||
from cacheflow.sampling_params import SamplingParams
|
||||
from cacheflow.server.arg_utils import (
|
||||
add_server_arguments, create_server_configs_from_args)
|
||||
from cacheflow.server.arg_utils import ServerArgs
|
||||
from cacheflow.server.llm_server import LLMServer
|
||||
from cacheflow.server.ray_utils import initialize_cluster
|
||||
|
||||
@@ -116,10 +115,10 @@ if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--host", type=str, default="localhost")
|
||||
parser.add_argument("--port", type=int, default=10002)
|
||||
parser = add_server_arguments(parser)
|
||||
parser = ServerArgs.add_cli_args(parser)
|
||||
args = parser.parse_args()
|
||||
|
||||
server_configs = create_server_configs_from_args(args)
|
||||
server_configs = ServerArgs.from_cli_args(args).create_server_configs()
|
||||
parallel_config = server_configs[2]
|
||||
distributed_init_method, stage_devices = initialize_cluster(parallel_config)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user