[Bug] Fix Failure in /v1/chat/completions/render for Multimodal Requests (https://github.com/vllm-project/vllm/issues/35665) (#35684)
This commit is contained in:
@@ -368,6 +368,7 @@ async def init_app_state(
|
||||
request_logger=request_logger,
|
||||
chat_template=resolved_chat_template,
|
||||
chat_template_content_format=args.chat_template_content_format,
|
||||
default_chat_template_kwargs=args.default_chat_template_kwargs,
|
||||
trust_request_chat_template=args.trust_request_chat_template,
|
||||
)
|
||||
|
||||
@@ -457,6 +458,9 @@ async def init_render_app_state(
|
||||
|
||||
state.openai_serving_models = model_registry
|
||||
|
||||
# Expose tokenization via the render handler (no engine required).
|
||||
state.openai_serving_tokenization = state.openai_serving_render
|
||||
|
||||
state.vllm_config = vllm_config
|
||||
# Disable stats logging — there is no engine to poll.
|
||||
state.log_stats = False
|
||||
|
||||
Reference in New Issue
Block a user