[Frontend] Separate OpenAI Batch Runner usage from API Server (#4851)

This commit is contained in:
Alex Wu
2024-05-16 11:42:41 -04:00
committed by GitHub
parent dbc0754ddf
commit 5e0391c040
2 changed files with 2 additions and 1 deletions

View File

@@ -101,7 +101,7 @@ async def main(args):
engine_args = AsyncEngineArgs.from_cli_args(args)
engine = AsyncLLMEngine.from_engine_args(
engine_args, usage_context=UsageContext.OPENAI_API_SERVER)
engine_args, usage_context=UsageContext.OPENAI_BATCH_RUNNER)
# When using single vLLM without engine_use_ray
model_config = await engine.get_model_config()