Remove deprecated reasoning_content message field (#33402)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-01-30 11:48:15 +00:00
parent 174f16700b
commit c5113f60f2
8 changed files with 26 additions and 55 deletions
--- a/tests/entrypoints/openai/tool_parsers/test_openai_tool_parser.py
+++ b/tests/entrypoints/openai/tool_parsers/test_openai_tool_parser.py
@@ -131,7 +131,7 @@ def extract_reasoning_and_calls(chunks: list) -> tuple[str, list[str], list[str]
    Extract accumulated reasoning text and tool call arguments
    from streaming chunks.
    """
-    reasoning_content: str = ""
+    reasoning: str = ""
    tool_calls: dict[int, dict[str, str]] = {}

    for chunk in chunks:
@@ -139,8 +139,8 @@ def extract_reasoning_and_calls(chunks: list) -> tuple[str, list[str], list[str]
        if not choice:
            continue

-        if hasattr(choice, "reasoning_content") and choice.reasoning_content:
-            reasoning_content += choice.reasoning_content
+        if hasattr(choice, "reasoning") and choice.reasoning:
+            reasoning += choice.reasoning

        for tc in getattr(choice, "tool_calls", []) or []:
            idx = getattr(tc, "index", 0)
@@ -156,7 +156,7 @@ def extract_reasoning_and_calls(chunks: list) -> tuple[str, list[str], list[str]
    function_names: list[str] = [v["name"] for _, v in sorted(tool_calls.items())]
    arguments: list[str] = [v["arguments"] for _, v in sorted(tool_calls.items())]

-    return reasoning_content, arguments, function_names
+    return reasoning, arguments, function_names


 # ==========================================================
--- a/tests/reasoning/utils.py
+++ b/tests/reasoning/utils.py
@@ -18,9 +18,6 @@ class StreamingReasoningReconstructor:
        assert delta.content is None or delta.reasoning is None, (
            "Both content and reasoning content are present in the delta message"
        )
-        assert delta.reasoning == delta.reasoning_content, (
-            "reasoning_content should be present for backwards compatibility"
-        )
        if delta.content is not None:
            if self.other_content is None:
                self.other_content = delta.content
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1437,7 +1437,7 @@ def _parse_chat_message_content(
 ) -> list[ConversationMessage]:
    role = message["role"]
    content = message.get("content")
-    reasoning = message.get("reasoning") or message.get("reasoning_content")
+    reasoning = message.get("reasoning")

    if content is None:
        content = []
@@ -1463,9 +1463,6 @@ def _parse_chat_message_content(
            # Include reasoning if present for interleaved thinking.
            if reasoning is not None:
                result_msg["reasoning"] = cast(str, reasoning)
-                result_msg["reasoning_content"] = cast(
-                    str, reasoning
-                )  # keep compatibility
        elif role == "tool":
            parsed_msg = _ToolParser(message)
            if "tool_call_id" in parsed_msg:
--- a/vllm/entrypoints/openai/chat_completion/protocol.py
+++ b/vllm/entrypoints/openai/chat_completion/protocol.py
@@ -61,14 +61,6 @@ class ChatMessage(OpenAIBaseModel):

    # vLLM-specific fields that are not in OpenAI spec
    reasoning: str | None = None
-    reasoning_content: str | None = None
-    """Deprecated: use `reasoning` instead."""
-
-    @model_validator(mode="after")
-    def handle_deprecated_reasoning_content(self):
-        """Copy reasoning to reasoning_content for backward compatibility."""
-        self.reasoning_content = self.reasoning
-        return self


 class ChatCompletionLogProb(OpenAIBaseModel):
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -1186,8 +1186,8 @@ class OpenAIServingChat(OpenAIServing):
                        delta_content_parts = []
                        if delta_message.content:
                            delta_content_parts.append(delta_message.content)
-                        if delta_message.reasoning_content:
-                            reasoning = delta_message.reasoning_content
+                        if delta_message.reasoning:
+                            reasoning = delta_message.reasoning
                            delta_content_parts.append(f"[reasoning: {reasoning}]")
                        if delta_message.tool_calls:
                            tool_args = "".join(
--- a/vllm/entrypoints/openai/engine/protocol.py
+++ b/vllm/entrypoints/openai/engine/protocol.py
@@ -261,16 +261,8 @@ class DeltaMessage(OpenAIBaseModel):
    role: str | None = None
    content: str | None = None
    reasoning: str | None = None
-    reasoning_content: str | None = None
-    """Deprecated: use `reasoning` instead."""
    tool_calls: list[DeltaToolCall] = Field(default_factory=list)

-    @model_validator(mode="after")
-    def handle_deprecated_reasoning_content(self):
-        """Copy reasoning to reasoning_content for backward compatibility."""
-        self.reasoning_content = self.reasoning
-        return self
-

 ####### Tokens IN <> Tokens OUT #######
 class GenerateRequest(BaseModel):
--- a/vllm/entrypoints/openai/parser/harmony_utils.py
+++ b/vllm/entrypoints/openai/parser/harmony_utils.py
@@ -321,13 +321,9 @@ def parse_chat_input_to_harmony_message(
            commentary_msg = commentary_msg.with_channel("commentary")
            msgs.append(commentary_msg)

-        reasoning_content = chat_msg.get("reasoning") or chat_msg.get(
-            "reasoning_content"
-        )
-        if reasoning_content:
-            analysis_msg = Message.from_role_and_content(
-                Role.ASSISTANT, reasoning_content
-            )
+        reasoning = chat_msg.get("reasoning")
+        if reasoning:
+            analysis_msg = Message.from_role_and_content(Role.ASSISTANT, reasoning)
            analysis_msg = analysis_msg.with_channel("analysis")
            msgs.append(analysis_msg)

@@ -362,9 +358,9 @@ def parse_chat_input_to_harmony_message(
        return [msg]

    # Non-tool reasoning content
-    reasoning_content = chat_msg.get("reasoning") or chat_msg.get("reasoning_content")
-    if role == "assistant" and reasoning_content:
-        analysis_msg = Message.from_role_and_content(Role.ASSISTANT, reasoning_content)
+    reasoning = chat_msg.get("reasoning")
+    if role == "assistant" and reasoning:
+        analysis_msg = Message.from_role_and_content(Role.ASSISTANT, reasoning)
        analysis_msg = analysis_msg.with_channel("analysis")
        msgs.append(analysis_msg)

@@ -545,7 +541,7 @@ def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutpu
    return output_items


-def _parse_reasoning_content(message: Message) -> list[ResponseOutputItem]:
+def _parse_reasoning(message: Message) -> list[ResponseOutputItem]:
    """Parse reasoning/analysis content into reasoning items."""
    output_items = []
    for content in message.content:
@@ -646,7 +642,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:
        elif recipient.startswith(("python", "browser", "container")):
            # Built-in tool recipients (python/browser/container)
            # generate reasoning output
-            output_items.extend(_parse_reasoning_content(message))
+            output_items.extend(_parse_reasoning(message))

        # All other recipients are MCP calls
        else:
@@ -654,12 +650,12 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]:

    # No recipient - handle based on channel for non-tool messages
    elif message.channel == "analysis":
-        output_items.extend(_parse_reasoning_content(message))
+        output_items.extend(_parse_reasoning(message))

    elif message.channel == "commentary":
        # Per Harmony format, commentary channel can contain preambles to calling
        # multiple functions - explanatory text with no recipient
-        output_items.extend(_parse_reasoning_content(message))
+        output_items.extend(_parse_reasoning(message))

    elif message.channel == "final":
        output_items.append(_parse_final_message(message))
--- a/vllm/tokenizers/deepseek_v32_encoding.py
+++ b/vllm/tokenizers/deepseek_v32_encoding.py
@@ -45,7 +45,7 @@ dsml_token: str = "｜DSML｜"
 system_msg_template: str = "{content}"
 user_msg_template: str = "<｜User｜>{content}<｜Assistant｜>"
 assistant_msg_template: str = "{reasoning}{content}{tool_calls}<｜end▁of▁sentence｜>"
-thinking_template = "{reasoning_content}"
+thinking_template = "{reasoning}"

 response_format_template: str = "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{schema}"
 tool_call_template: str = (
@@ -170,7 +170,7 @@ def render_message(
    tools = msg.get("tools")
    response_format = msg.get("response_format")
    tool_calls = msg.get("tool_calls")
-    reasoning_content = msg.get("reasoning") or msg.get("reasoning_content")
+    reasoning = msg.get("reasoning")
    is_prefix = msg.get("prefix", False)

    if tools:
@@ -269,13 +269,12 @@ def render_message(
        summary_content = content or ""

        if thinking_mode == "thinking" and index > last_user_idx:
-            if not (reasoning_content or tool_calls):
+            if not (reasoning or tool_calls):
                raise ValueError(
-                    f"ThinkingMode: {thinking_mode}, invalid message without reasoning_content/tool_calls `{msg}` after last user message"
+                    f"ThinkingMode: {thinking_mode}, invalid message without reasoning/tool_calls `{msg}` after last user message"
                )
            thinking_part = (
-                thinking_template.format(reasoning_content=reasoning_content or "")
-                + thinking_end_token
+                thinking_template.format(reasoning=reasoning or "") + thinking_end_token
            )

        if not tool_calls and is_prefix:
@@ -307,7 +306,6 @@ def drop_thinking_messages(

        elif role == "assistant":
            msg_wo_thinking = copy.copy(msg)
-            msg_wo_thinking.pop("reasoning_content", None)
            msg_wo_thinking.pop("reasoning", None)
            messages_wo_thinking.append(msg_wo_thinking)

@@ -421,7 +419,7 @@ def parse_tool_calls(index: int, text: str):
 # formatted string and will not attempt to correct malformed output
 # that may be generated by the model.
 def parse_message_from_completion_text(text: str, thinking_mode: str):
-    summary_content, reasoning_content, tool_calls = "", "", []
+    summary_content, reasoning, tool_calls = "", "", []
    index, stop_token = 0, None
    tool_calls_start_token = f"\n\n<{dsml_token}function_calls"

@@ -431,7 +429,7 @@ def parse_message_from_completion_text(text: str, thinking_mode: str):
        index, content_delta, stop_token = _read_until_stop(
            index, text, [thinking_end_token, tool_calls_start_token]
        )
-        reasoning_content = content_delta
+        reasoning = content_delta
        if stop_token != thinking_end_token:
            raise RuntimeError("Invalid thinking format")

@@ -462,13 +460,12 @@ def parse_message_from_completion_text(text: str, thinking_mode: str):
        thinking_end_token,
        dsml_token,
    ]:
-        if sp_token in summary_content or sp_token in reasoning_content:
+        if sp_token in summary_content or sp_token in reasoning:
            raise RuntimeError("Unexpected special token in content")

    return {
        "role": "assistant",
        "content": summary_content,
-        "reasoning_content": reasoning_content,
-        "reasoning": reasoning_content,
+        "reasoning": reasoning,
        "tool_calls": tool_calls_to_openai_format(tool_calls),
    }