From a3ea760ea59a8253058c80240a9f0f2aa1fbc3c0 Mon Sep 17 00:00:00 2001 From: Julien Denize <40604584+juliendenize@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:45:34 +0100 Subject: [PATCH] Add 'none' reasoning effort to ChatCompletionRequest (#36238) Signed-off-by: Julien Denize --- vllm/entrypoints/openai/chat_completion/protocol.py | 9 ++++++++- vllm/entrypoints/openai/chat_completion/serving.py | 4 +++- vllm/entrypoints/serve/render/serving.py | 3 +++ 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index 4e4077b31..a6fef7868 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -179,7 +179,7 @@ class ChatCompletionRequest(OpenAIBaseModel): | ChatCompletionNamedToolChoiceParam | None ) = "none" - reasoning_effort: Literal["low", "medium", "high"] | None = None + reasoning_effort: Literal["none", "low", "medium", "high"] | None = None include_reasoning: bool = True parallel_tool_calls: bool | None = True @@ -778,3 +778,10 @@ class ChatCompletionRequest(OpenAIBaseModel): ) return data + + @model_validator(mode="before") + @classmethod + def set_include_reasoning_for_none_effort(cls, data: Any) -> Any: + if data.get("reasoning_effort") == "none": + data["include_reasoning"] = False + return data diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index eb39e649a..2181586b4 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1893,8 +1893,10 @@ class OpenAIServingChat(OpenAIServing): # if the model supports it. TODO: Support browsing. assert not self.supports_browsing assert not self.supports_code_interpreter + if (reasoning_effort := request.reasoning_effort) == "none": + raise ValueError(f"Harmony does not support {reasoning_effort=}") sys_msg = get_system_message( - reasoning_effort=request.reasoning_effort, + reasoning_effort=reasoning_effort, browser_description=None, python_description=None, with_custom_tools=should_include_tools, diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py index 3674de04c..7cc6abc7d 100644 --- a/vllm/entrypoints/serve/render/serving.py +++ b/vllm/entrypoints/serve/render/serving.py @@ -221,6 +221,9 @@ class OpenAIServingRender: # if the model supports it. TODO: Support browsing. assert not self.supports_browsing assert not self.supports_code_interpreter + assert request.reasoning_effort != "none", ( + "Harmony does not support reasoning_effort='none'" + ) sys_msg = get_system_message( reasoning_effort=request.reasoning_effort, browser_description=None,