diff --git a/tests/entrypoints/openai/tool_parsers/test_openai_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_openai_tool_parser.py index 7cb87fd13..cedec72fe 100644 --- a/tests/entrypoints/openai/tool_parsers/test_openai_tool_parser.py +++ b/tests/entrypoints/openai/tool_parsers/test_openai_tool_parser.py @@ -131,7 +131,7 @@ def extract_reasoning_and_calls(chunks: list) -> tuple[str, list[str], list[str] Extract accumulated reasoning text and tool call arguments from streaming chunks. """ - reasoning_content: str = "" + reasoning: str = "" tool_calls: dict[int, dict[str, str]] = {} for chunk in chunks: @@ -139,8 +139,8 @@ def extract_reasoning_and_calls(chunks: list) -> tuple[str, list[str], list[str] if not choice: continue - if hasattr(choice, "reasoning_content") and choice.reasoning_content: - reasoning_content += choice.reasoning_content + if hasattr(choice, "reasoning") and choice.reasoning: + reasoning += choice.reasoning for tc in getattr(choice, "tool_calls", []) or []: idx = getattr(tc, "index", 0) @@ -156,7 +156,7 @@ def extract_reasoning_and_calls(chunks: list) -> tuple[str, list[str], list[str] function_names: list[str] = [v["name"] for _, v in sorted(tool_calls.items())] arguments: list[str] = [v["arguments"] for _, v in sorted(tool_calls.items())] - return reasoning_content, arguments, function_names + return reasoning, arguments, function_names # ========================================================== diff --git a/tests/reasoning/utils.py b/tests/reasoning/utils.py index 39ba52bc7..cb42d5f0b 100644 --- a/tests/reasoning/utils.py +++ b/tests/reasoning/utils.py @@ -18,9 +18,6 @@ class StreamingReasoningReconstructor: assert delta.content is None or delta.reasoning is None, ( "Both content and reasoning content are present in the delta message" ) - assert delta.reasoning == delta.reasoning_content, ( - "reasoning_content should be present for backwards compatibility" - ) if delta.content is not None: if self.other_content is None: self.other_content = delta.content diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 35dbed006..50b664bdb 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1437,7 +1437,7 @@ def _parse_chat_message_content( ) -> list[ConversationMessage]: role = message["role"] content = message.get("content") - reasoning = message.get("reasoning") or message.get("reasoning_content") + reasoning = message.get("reasoning") if content is None: content = [] @@ -1463,9 +1463,6 @@ def _parse_chat_message_content( # Include reasoning if present for interleaved thinking. if reasoning is not None: result_msg["reasoning"] = cast(str, reasoning) - result_msg["reasoning_content"] = cast( - str, reasoning - ) # keep compatibility elif role == "tool": parsed_msg = _ToolParser(message) if "tool_call_id" in parsed_msg: diff --git a/vllm/entrypoints/openai/chat_completion/protocol.py b/vllm/entrypoints/openai/chat_completion/protocol.py index a76dc73d9..311e9b52e 100644 --- a/vllm/entrypoints/openai/chat_completion/protocol.py +++ b/vllm/entrypoints/openai/chat_completion/protocol.py @@ -61,14 +61,6 @@ class ChatMessage(OpenAIBaseModel): # vLLM-specific fields that are not in OpenAI spec reasoning: str | None = None - reasoning_content: str | None = None - """Deprecated: use `reasoning` instead.""" - - @model_validator(mode="after") - def handle_deprecated_reasoning_content(self): - """Copy reasoning to reasoning_content for backward compatibility.""" - self.reasoning_content = self.reasoning - return self class ChatCompletionLogProb(OpenAIBaseModel): diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py index c5e6c5c6a..744dd95f2 100644 --- a/vllm/entrypoints/openai/chat_completion/serving.py +++ b/vllm/entrypoints/openai/chat_completion/serving.py @@ -1186,8 +1186,8 @@ class OpenAIServingChat(OpenAIServing): delta_content_parts = [] if delta_message.content: delta_content_parts.append(delta_message.content) - if delta_message.reasoning_content: - reasoning = delta_message.reasoning_content + if delta_message.reasoning: + reasoning = delta_message.reasoning delta_content_parts.append(f"[reasoning: {reasoning}]") if delta_message.tool_calls: tool_args = "".join( diff --git a/vllm/entrypoints/openai/engine/protocol.py b/vllm/entrypoints/openai/engine/protocol.py index e491f9399..9d8792f30 100644 --- a/vllm/entrypoints/openai/engine/protocol.py +++ b/vllm/entrypoints/openai/engine/protocol.py @@ -261,16 +261,8 @@ class DeltaMessage(OpenAIBaseModel): role: str | None = None content: str | None = None reasoning: str | None = None - reasoning_content: str | None = None - """Deprecated: use `reasoning` instead.""" tool_calls: list[DeltaToolCall] = Field(default_factory=list) - @model_validator(mode="after") - def handle_deprecated_reasoning_content(self): - """Copy reasoning to reasoning_content for backward compatibility.""" - self.reasoning_content = self.reasoning - return self - ####### Tokens IN <> Tokens OUT ####### class GenerateRequest(BaseModel): diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py index e901fe6d7..58ba9fee4 100644 --- a/vllm/entrypoints/openai/parser/harmony_utils.py +++ b/vllm/entrypoints/openai/parser/harmony_utils.py @@ -321,13 +321,9 @@ def parse_chat_input_to_harmony_message( commentary_msg = commentary_msg.with_channel("commentary") msgs.append(commentary_msg) - reasoning_content = chat_msg.get("reasoning") or chat_msg.get( - "reasoning_content" - ) - if reasoning_content: - analysis_msg = Message.from_role_and_content( - Role.ASSISTANT, reasoning_content - ) + reasoning = chat_msg.get("reasoning") + if reasoning: + analysis_msg = Message.from_role_and_content(Role.ASSISTANT, reasoning) analysis_msg = analysis_msg.with_channel("analysis") msgs.append(analysis_msg) @@ -362,9 +358,9 @@ def parse_chat_input_to_harmony_message( return [msg] # Non-tool reasoning content - reasoning_content = chat_msg.get("reasoning") or chat_msg.get("reasoning_content") - if role == "assistant" and reasoning_content: - analysis_msg = Message.from_role_and_content(Role.ASSISTANT, reasoning_content) + reasoning = chat_msg.get("reasoning") + if role == "assistant" and reasoning: + analysis_msg = Message.from_role_and_content(Role.ASSISTANT, reasoning) analysis_msg = analysis_msg.with_channel("analysis") msgs.append(analysis_msg) @@ -545,7 +541,7 @@ def _parse_function_call(message: Message, recipient: str) -> list[ResponseOutpu return output_items -def _parse_reasoning_content(message: Message) -> list[ResponseOutputItem]: +def _parse_reasoning(message: Message) -> list[ResponseOutputItem]: """Parse reasoning/analysis content into reasoning items.""" output_items = [] for content in message.content: @@ -646,7 +642,7 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: elif recipient.startswith(("python", "browser", "container")): # Built-in tool recipients (python/browser/container) # generate reasoning output - output_items.extend(_parse_reasoning_content(message)) + output_items.extend(_parse_reasoning(message)) # All other recipients are MCP calls else: @@ -654,12 +650,12 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: # No recipient - handle based on channel for non-tool messages elif message.channel == "analysis": - output_items.extend(_parse_reasoning_content(message)) + output_items.extend(_parse_reasoning(message)) elif message.channel == "commentary": # Per Harmony format, commentary channel can contain preambles to calling # multiple functions - explanatory text with no recipient - output_items.extend(_parse_reasoning_content(message)) + output_items.extend(_parse_reasoning(message)) elif message.channel == "final": output_items.append(_parse_final_message(message)) diff --git a/vllm/tokenizers/deepseek_v32_encoding.py b/vllm/tokenizers/deepseek_v32_encoding.py index 6a077cbe7..249b53262 100644 --- a/vllm/tokenizers/deepseek_v32_encoding.py +++ b/vllm/tokenizers/deepseek_v32_encoding.py @@ -45,7 +45,7 @@ dsml_token: str = "|DSML|" system_msg_template: str = "{content}" user_msg_template: str = "<|User|>{content}<|Assistant|>" assistant_msg_template: str = "{reasoning}{content}{tool_calls}<|end▁of▁sentence|>" -thinking_template = "{reasoning_content}" +thinking_template = "{reasoning}" response_format_template: str = "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{schema}" tool_call_template: str = ( @@ -170,7 +170,7 @@ def render_message( tools = msg.get("tools") response_format = msg.get("response_format") tool_calls = msg.get("tool_calls") - reasoning_content = msg.get("reasoning") or msg.get("reasoning_content") + reasoning = msg.get("reasoning") is_prefix = msg.get("prefix", False) if tools: @@ -269,13 +269,12 @@ def render_message( summary_content = content or "" if thinking_mode == "thinking" and index > last_user_idx: - if not (reasoning_content or tool_calls): + if not (reasoning or tool_calls): raise ValueError( - f"ThinkingMode: {thinking_mode}, invalid message without reasoning_content/tool_calls `{msg}` after last user message" + f"ThinkingMode: {thinking_mode}, invalid message without reasoning/tool_calls `{msg}` after last user message" ) thinking_part = ( - thinking_template.format(reasoning_content=reasoning_content or "") - + thinking_end_token + thinking_template.format(reasoning=reasoning or "") + thinking_end_token ) if not tool_calls and is_prefix: @@ -307,7 +306,6 @@ def drop_thinking_messages( elif role == "assistant": msg_wo_thinking = copy.copy(msg) - msg_wo_thinking.pop("reasoning_content", None) msg_wo_thinking.pop("reasoning", None) messages_wo_thinking.append(msg_wo_thinking) @@ -421,7 +419,7 @@ def parse_tool_calls(index: int, text: str): # formatted string and will not attempt to correct malformed output # that may be generated by the model. def parse_message_from_completion_text(text: str, thinking_mode: str): - summary_content, reasoning_content, tool_calls = "", "", [] + summary_content, reasoning, tool_calls = "", "", [] index, stop_token = 0, None tool_calls_start_token = f"\n\n<{dsml_token}function_calls" @@ -431,7 +429,7 @@ def parse_message_from_completion_text(text: str, thinking_mode: str): index, content_delta, stop_token = _read_until_stop( index, text, [thinking_end_token, tool_calls_start_token] ) - reasoning_content = content_delta + reasoning = content_delta if stop_token != thinking_end_token: raise RuntimeError("Invalid thinking format") @@ -462,13 +460,12 @@ def parse_message_from_completion_text(text: str, thinking_mode: str): thinking_end_token, dsml_token, ]: - if sp_token in summary_content or sp_token in reasoning_content: + if sp_token in summary_content or sp_token in reasoning: raise RuntimeError("Unexpected special token in content") return { "role": "assistant", "content": summary_content, - "reasoning_content": reasoning_content, - "reasoning": reasoning_content, + "reasoning": reasoning, "tool_calls": tool_calls_to_openai_format(tool_calls), }