feat(api): Eager chat template warmup to eliminate first-request latency (#30700)

Signed-off-by: Nathan Price <nathan@abridge.com>
This commit is contained in:
Nathan Price
2025-12-17 18:01:29 -06:00
committed by GitHub
parent e3fc374a9a
commit 05a83dc6ee
2 changed files with 52 additions and 0 deletions

View File

@@ -1082,6 +1082,9 @@ async def init_app_state(
if "generate" in supported_tasks
else None
)
# Warm up chat template processing to avoid first-request latency
if state.openai_serving_chat is not None:
await state.openai_serving_chat.warmup()
state.openai_serving_completion = (
OpenAIServingCompletion(
engine_client,