[Frontend] Delegate tokenization serving preprocessing to OpenAIServingRender (#37266)

Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
This commit is contained in:
Sage
2026-03-17 13:22:54 +02:00
committed by GitHub
parent 4af9ed21cb
commit 00f8e0d211
5 changed files with 33 additions and 30 deletions

View File

@@ -46,6 +46,7 @@ from vllm.entrypoints.sagemaker.api_router import sagemaker_standards_bootstrap
from vllm.entrypoints.serve.elastic_ep.middleware import (
ScalingMiddleware,
)
from vllm.entrypoints.serve.render.serving import OpenAIServingRender
from vllm.entrypoints.serve.tokenize.serving import OpenAIServingTokenization
from vllm.entrypoints.utils import (
cli_env_setup,
@@ -365,9 +366,27 @@ async def init_app_state(
lora_modules=lora_modules,
)
await state.openai_serving_models.init_static_loras()
state.openai_serving_render = OpenAIServingRender(
model_config=engine_client.model_config,
renderer=engine_client.renderer,
io_processor=engine_client.io_processor,
model_registry=state.openai_serving_models.registry,
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,
trust_request_chat_template=args.trust_request_chat_template,
enable_auto_tools=args.enable_auto_tool_choice,
exclude_tools_when_tool_choice_none=args.exclude_tools_when_tool_choice_none,
tool_parser=args.tool_call_parser,
default_chat_template_kwargs=args.default_chat_template_kwargs,
log_error_stack=args.log_error_stack,
)
state.openai_serving_tokenization = OpenAIServingTokenization(
engine_client,
state.openai_serving_models,
state.openai_serving_render,
request_logger=request_logger,
chat_template=resolved_chat_template,
chat_template_content_format=args.chat_template_content_format,