Initialize AsyncLLMEngine bg loop correctly (#943)

This commit is contained in:
Antoni Baum
2023-09-04 17:41:22 -07:00
committed by GitHub
parent 002800f081
commit 1696725879
3 changed files with 25 additions and 7 deletions

View File

@@ -191,6 +191,9 @@ async def create_chat_completion(request: ChatCompletionRequest,
"""
logger.info(f"Received chat completion request: {request}")
if not engine.is_running:
engine.start_background_loop()
error_check_ret = await check_model(request)
if error_check_ret is not None:
return error_check_ret
@@ -363,6 +366,9 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
"""
logger.info(f"Received completion request: {request}")
if not engine.is_running:
engine.start_background_loop()
error_check_ret = await check_model(request)
if error_check_ret is not None:
return error_check_ret
@@ -620,7 +626,8 @@ if __name__ == "__main__":
served_model = args.model
engine_args = AsyncEngineArgs.from_cli_args(args)
engine = AsyncLLMEngine.from_engine_args(engine_args)
engine = AsyncLLMEngine.from_engine_args(engine_args,
start_engine_loop=False)
engine_model_config = asyncio.run(engine.get_model_config())
max_model_len = engine_model_config.get_max_model_len()