diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index 4f1196281..eb39e649a 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1463,17 +1463,7 @@ class OpenAIServingChat(OpenAIServing): tool_call_class = ( MistralToolCall if is_mistral_tokenizer(tokenizer) else ToolCall ) - if self.use_harmony: - # Harmony models already have parsed content and tool_calls - # through parse_chat_output. Respect its output directly. - message = ChatMessage( - role=role, - reasoning=reasoning, - content=content, - tool_calls=tool_calls if tool_calls else [], - ) - - elif (not self.enable_auto_tools or not self.tool_parser) and ( + if (not self.enable_auto_tools or not self.tool_parser) and ( not isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) and request.tool_choice != "required" ):