Start background task in AsyncLLMEngine.generate (#988)

Co-authored-by: Zhuohan Li <zhuohan123@gmail.com>
This commit is contained in:
Antoni Baum
2023-09-08 00:03:39 -07:00
committed by GitHub
parent 4b5bcf8906
commit 080438477f
4 changed files with 16 additions and 24 deletions

View File

@@ -192,9 +192,6 @@ async def create_chat_completion(request: ChatCompletionRequest,
"""
logger.info(f"Received chat completion request: {request}")
if not engine.is_running:
engine.start_background_loop()
error_check_ret = await check_model(request)
if error_check_ret is not None:
return error_check_ret
@@ -367,9 +364,6 @@ async def create_completion(request: CompletionRequest, raw_request: Request):
"""
logger.info(f"Received completion request: {request}")
if not engine.is_running:
engine.start_background_loop()
error_check_ret = await check_model(request)
if error_check_ret is not None:
return error_check_ret
@@ -627,8 +621,7 @@ if __name__ == "__main__":
served_model = args.model
engine_args = AsyncEngineArgs.from_cli_args(args)
engine = AsyncLLMEngine.from_engine_args(engine_args,
start_engine_loop=False)
engine = AsyncLLMEngine.from_engine_args(engine_args)
engine_model_config = asyncio.run(engine.get_model_config())
max_model_len = engine_model_config.get_max_model_len()