[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684)

2026-03-14 17:10:11 +03:00
parent 600a039f57
commit 4a718e770d
13 changed files with 560 additions and 168 deletions
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -368,6 +368,7 @@ async def init_app_state(
        request_logger=request_logger,
        chat_template=resolved_chat_template,
        chat_template_content_format=args.chat_template_content_format,
+        default_chat_template_kwargs=args.default_chat_template_kwargs,
        trust_request_chat_template=args.trust_request_chat_template,
    )

@@ -457,6 +458,9 @@ async def init_render_app_state(

    state.openai_serving_models = model_registry

+    # Expose tokenization via the render handler (no engine required).
+    state.openai_serving_tokenization = state.openai_serving_render
+
    state.vllm_config = vllm_config
    # Disable stats logging — there is no engine to poll.
    state.log_stats = False