Remove deprecated reasoning_content message field(part-2) (#37480)

Signed-off-by: JartX <sagformas@epdcenter.es> Signed-off-by: Ifta Khairul Alam Adil <ikaadil007@gmail.com> Signed-off-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Philip Ottesen <phiott256@gmail.com> Signed-off-by: Woosuk Kwon <woosuk@inferact.ai> Signed-off-by: Michael Goin <mgoin64@gmail.com> Signed-off-by: Giancarlo Delfin <gdelfin@inferact.ai> Signed-off-by: Andy Lo <andy@mistral.ai> Signed-off-by: Thillai Chithambaram <thillaichithambaram.a@gmail.com> Signed-off-by: sihao.li <sihao.li@intel.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: JartX <sagformas@epdcenter.es> Co-authored-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Philip Ottesen <phiott256@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Giancarlo Delfin <32987265+TheEpicDolphin@users.noreply.github.com> Co-authored-by: Andy Lo <andy@mistral.ai> Co-authored-by: Thillai Chithambaram <79466435+thillai-c@users.noreply.github.com> Co-authored-by: sihao_li <165983188+1643661061leo@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2026-03-19 16:20:08 +01:00
parent 96266f119b
commit 104605cbf2
8 changed files with 36 additions and 38 deletions
--- a/docs/features/reasoning_outputs.md
+++ b/docs/features/reasoning_outputs.md
@@ -5,7 +5,7 @@ vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface.
 Reasoning models return an additional `reasoning` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.

 !!! warning
-    `reasoning` used to be called `reasoning_content`. For now, `reasoning_content` will continue to work. However, we encourage you to migrate to `reasoning` in case `reasoning_content` is removed in future.
+    `reasoning` used to be called `reasoning_content`. To migrate, directly replace `reasoning_content` with `reasoning`.

 ## Supported Models

--- a/tests/entrypoints/openai/chat_completion/test_serving_chat.py
+++ b/tests/entrypoints/openai/chat_completion/test_serving_chat.py
@@ -484,7 +484,7 @@ class TestGPTOSSSpeculativeChat:
        )

        content = ""
-        reasoning_content = ""
+        reasoning = ""
        async for chunk in stream:
            delta = chunk.choices[0].delta
            if delta.content:
@@ -492,9 +492,9 @@ class TestGPTOSSSpeculativeChat:

            chunk_reasoning = getattr(delta, "reasoning", None)
            if chunk_reasoning:
-                reasoning_content += delta.reasoning
+                reasoning += delta.reasoning

-        assert len(reasoning_content) > 0, "No reasoning was generated."
+        assert len(reasoning) > 0, "No reasoning was generated."
        assert content.strip() == "4"


--- a/tests/reasoning/test_step3p5_reasoning_parser.py
+++ b/tests/reasoning/test_step3p5_reasoning_parser.py
@@ -21,119 +21,119 @@ def step3p5_tokenizer():

 SIMPLE_REASONING = {
    "output": "This is a reasoning section</think>This is the rest",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 # need to get into parser again to remove newline after </think>
 COMPLETE_REASONING = {
    "output": "This is a reasoning section</think>",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": None,
    "is_reasoning_end": False,
 }
 NO_CONTENT = {
    "output": "This is content",
-    "reasoning_content": "This is content",
+    "reasoning": "This is content",
    "content": None,
    "is_reasoning_end": False,
 }
 NO_REASONING_STREAMING = {
    "output": "This is a reasoning section",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": None,
    "is_reasoning_end": False,
 }
 MULTIPLE_LINES = {
    "output": "This\nThat</think>This is the rest\nThat",
-    "reasoning_content": "This\nThat",
+    "reasoning": "This\nThat",
    "content": "This is the rest\nThat",
    "is_reasoning_end": True,
 }
 SHORTEST_REASONING_NO_STREAMING = {
    "output": "</think>This is the rest",
-    "reasoning_content": None,
+    "reasoning": None,
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 SHORTEST_REASONING = {
    "output": "</think>This is the rest",
-    "reasoning_content": None,
+    "reasoning": None,
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 REASONING_WITH_THINK = {
    "output": "<think>This is a reasoning section</think>This is the rest",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 COMPLETE_REASONING_WITH_THINK = {
    "output": "<think>This is a reasoning section</think>",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": None,
    "is_reasoning_end": False,
 }
 MULTIPLE_LINES_WITH_THINK = {
    "output": "<think>This\nThat</think>This is the rest\nThat",
-    "reasoning_content": "This\nThat",
+    "reasoning": "This\nThat",
    "content": "This is the rest\nThat",
    "is_reasoning_end": True,
 }
 SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
    "output": "</think>This is the rest",
-    "reasoning_content": None,
+    "reasoning": None,
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 SHORTEST_REASONING_WITH_THINK = {
    "output": "</think>This is the rest",
-    "reasoning_content": None,
+    "reasoning": None,
    "content": "This is the rest",
    "is_reasoning_end": True,
 }
 THINK_NO_END = {
    "output": "<think>This is a reasoning section",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": None,
    "is_reasoning_end": False,
 }
 EMPTY = {
    "output": "",
-    "reasoning_content": None,
+    "reasoning": None,
    "content": None,
    "is_reasoning_end": False,
 }
 EMPTY_STREAMING = {
    "output": "",
-    "reasoning_content": None,
+    "reasoning": None,
    "content": None,
    "is_reasoning_end": False,
 }
 NEW_LINE = {
    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
-    "reasoning_content": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }

 NEW_LINE_STREAMING = {
    "output": "\n<think>This is a reasoning section\n</think>\nThis is the rest",
-    "reasoning_content": "\nThis is a reasoning section",
+    "reasoning": "\nThis is a reasoning section",
    "content": "This is the rest",
    "is_reasoning_end": True,
 }

 NEW_LINE_STREAMING_COMPLEX_CONTENT = {
    "output": "\n This is a \n reasoning section\n\n\n</think>\n\nThis is the rest",
-    "reasoning_content": "\n This is a \n reasoning section\n\n",
+    "reasoning": "\n This is a \n reasoning section\n\n",
    "content": "\nThis is the rest",
    "is_reasoning_end": True,
 }

 MULTI_TURN_PROMPT_CONTENT = {
    "output": "<think> This is last turn's reasoning section </think> hello <think>",
-    "reasoning_content": "",
+    "reasoning": "",
    "content": "",
    "is_reasoning_end": False,
 }
@@ -296,7 +296,7 @@ def test_reasoning(
    print(f"content: {content}")
    test_id = request.node.callspec.id if hasattr(request.node, "callspec") else None
    if request.node.callspec.id != "multi_turn_prompt_content":
-        assert reasoning == param_dict["reasoning_content"]
+        assert reasoning == param_dict["reasoning"]
        assert content == param_dict["content"]

    # Test is_reasoning_end
--- a/vllm/entrypoints/openai/parser/responses_parser.py
+++ b/vllm/entrypoints/openai/parser/responses_parser.py
@@ -61,10 +61,10 @@ class ResponsesParser:
        # Store the finish_reason from the output
        self.finish_reason = output.finish_reason

-        reasoning_content, content = self.reasoning_parser_instance.extract_reasoning(
+        reasoning, content = self.reasoning_parser_instance.extract_reasoning(
            output.text, request=self.request
        )
-        if reasoning_content:
+        if reasoning:
            self.response_messages.append(
                ResponseReasoningItem(
                    type="reasoning",
@@ -73,7 +73,7 @@ class ResponsesParser:
                    content=[
                        Content(
                            type="reasoning_text",
-                            text=reasoning_content,
+                            text=reasoning,
                        )
                    ],
                )
--- a/vllm/entrypoints/openai/responses/utils.py
+++ b/vllm/entrypoints/openai/responses/utils.py
@@ -191,13 +191,13 @@ def _construct_single_message_from_response_item(
            ],
        )
    elif isinstance(item, ResponseReasoningItem):
-        reasoning_content = ""
+        reasoning = ""
        if item.encrypted_content:
            raise ValueError("Encrypted content is not supported.")
        elif item.content and len(item.content) >= 1:
-            reasoning_content = item.content[0].text
+            reasoning = item.content[0].text
        elif len(item.summary) >= 1:
-            reasoning_content = item.summary[0].text
+            reasoning = item.summary[0].text
            logger.warning(
                "Using summary text as reasoning content for item %s. "
                "Please use content instead of summary for "
@@ -206,7 +206,7 @@ def _construct_single_message_from_response_item(
            )
        return {
            "role": "assistant",
-            "reasoning": reasoning_content,
+            "reasoning": reasoning,
        }
    elif isinstance(item, ResponseOutputMessage):
        return {
--- a/vllm/parser/abstract_parser.py
+++ b/vllm/parser/abstract_parser.py
@@ -199,7 +199,7 @@ class Parser:
            request: The request object used to generate the output.

        Returns:
-            A tuple of (reasoning_content, response_content).
+            A tuple of (reasoning, response_content).
        """

    @abstractmethod
--- a/vllm/reasoning/nemotron_v3_reasoning_parser.py
+++ b/vllm/reasoning/nemotron_v3_reasoning_parser.py
@@ -17,9 +17,7 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
    def extract_reasoning(
        self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
    ) -> tuple[str | None, str | None]:
-        reasoning_content, final_content = super().extract_reasoning(
-            model_output, request
-        )
+        reasoning, final_content = super().extract_reasoning(model_output, request)
        chat_template_kwargs = getattr(request, "chat_template_kwargs", None)

        if (
@@ -30,6 +28,6 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
            )
            and final_content is None
        ):
-            reasoning_content, final_content = final_content, reasoning_content
+            reasoning, final_content = final_content, reasoning

-        return reasoning_content, final_content
+        return reasoning, final_content
--- a/vllm/tool_parsers/step3p5_tool_parser.py
+++ b/vllm/tool_parsers/step3p5_tool_parser.py
@@ -295,7 +295,7 @@ class StreamingXMLToolCallParser:
                    final_delta = DeltaMessage(
                        role=None,
                        content=None,
-                        reasoning_content=None,
+                        reasoning=None,
                        tool_calls=[
                            DeltaToolCall(
                                index=self.tool_call_index - 1,