Remove deprecated reasoning_content message field(part-2) (#37480)
Signed-off-by: JartX <sagformas@epdcenter.es> Signed-off-by: Ifta Khairul Alam Adil <ikaadil007@gmail.com> Signed-off-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com> Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Philip Ottesen <phiott256@gmail.com> Signed-off-by: Woosuk Kwon <woosuk@inferact.ai> Signed-off-by: Michael Goin <mgoin64@gmail.com> Signed-off-by: Giancarlo Delfin <gdelfin@inferact.ai> Signed-off-by: Andy Lo <andy@mistral.ai> Signed-off-by: Thillai Chithambaram <thillaichithambaram.a@gmail.com> Signed-off-by: sihao.li <sihao.li@intel.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: JartX <sagformas@epdcenter.es> Co-authored-by: Netanel Haber <58652339+netanel-haber@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Philip Ottesen <phiott256@gmail.com> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Giancarlo Delfin <32987265+TheEpicDolphin@users.noreply.github.com> Co-authored-by: Andy Lo <andy@mistral.ai> Co-authored-by: Thillai Chithambaram <79466435+thillai-c@users.noreply.github.com> Co-authored-by: sihao_li <165983188+1643661061leo@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
96266f119b
commit
104605cbf2
@@ -5,7 +5,7 @@ vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface.
|
||||
Reasoning models return an additional `reasoning` field in their outputs, which contains the reasoning steps that led to the final conclusion. This field is not present in the outputs of other models.
|
||||
|
||||
!!! warning
|
||||
`reasoning` used to be called `reasoning_content`. For now, `reasoning_content` will continue to work. However, we encourage you to migrate to `reasoning` in case `reasoning_content` is removed in future.
|
||||
`reasoning` used to be called `reasoning_content`. To migrate, directly replace `reasoning_content` with `reasoning`.
|
||||
|
||||
## Supported Models
|
||||
|
||||
|
||||
@@ -484,7 +484,7 @@ class TestGPTOSSSpeculativeChat:
|
||||
)
|
||||
|
||||
content = ""
|
||||
reasoning_content = ""
|
||||
reasoning = ""
|
||||
async for chunk in stream:
|
||||
delta = chunk.choices[0].delta
|
||||
if delta.content:
|
||||
@@ -492,9 +492,9 @@ class TestGPTOSSSpeculativeChat:
|
||||
|
||||
chunk_reasoning = getattr(delta, "reasoning", None)
|
||||
if chunk_reasoning:
|
||||
reasoning_content += delta.reasoning
|
||||
reasoning += delta.reasoning
|
||||
|
||||
assert len(reasoning_content) > 0, "No reasoning was generated."
|
||||
assert len(reasoning) > 0, "No reasoning was generated."
|
||||
assert content.strip() == "4"
|
||||
|
||||
|
||||
|
||||
@@ -21,119 +21,119 @@ def step3p5_tokenizer():
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output": "This is a reasoning section</think>This is the rest",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
# need to get into parser again to remove newline after </think>
|
||||
COMPLETE_REASONING = {
|
||||
"output": "This is a reasoning section</think>",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_CONTENT = {
|
||||
"output": "This is content",
|
||||
"reasoning_content": "This is content",
|
||||
"reasoning": "This is content",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_REASONING_STREAMING = {
|
||||
"output": "This is a reasoning section",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
MULTIPLE_LINES = {
|
||||
"output": "This\nThat</think>This is the rest\nThat",
|
||||
"reasoning_content": "This\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING_NO_STREAMING = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning_content": None,
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning_content": None,
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output": "<think>This is a reasoning section</think>This is the rest",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
COMPLETE_REASONING_WITH_THINK = {
|
||||
"output": "<think>This is a reasoning section</think>",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output": "<think>This\nThat</think>This is the rest\nThat",
|
||||
"reasoning_content": "This\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning_content": None,
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING_WITH_THINK = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning_content": None,
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
THINK_NO_END = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY = {
|
||||
"output": "",
|
||||
"reasoning_content": None,
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY_STREAMING = {
|
||||
"output": "",
|
||||
"reasoning_content": None,
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NEW_LINE = {
|
||||
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
NEW_LINE_STREAMING = {
|
||||
"output": "\n<think>This is a reasoning section\n</think>\nThis is the rest",
|
||||
"reasoning_content": "\nThis is a reasoning section",
|
||||
"reasoning": "\nThis is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
NEW_LINE_STREAMING_COMPLEX_CONTENT = {
|
||||
"output": "\n This is a \n reasoning section\n\n\n</think>\n\nThis is the rest",
|
||||
"reasoning_content": "\n This is a \n reasoning section\n\n",
|
||||
"reasoning": "\n This is a \n reasoning section\n\n",
|
||||
"content": "\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
MULTI_TURN_PROMPT_CONTENT = {
|
||||
"output": "<think> This is last turn's reasoning section </think> hello <think>",
|
||||
"reasoning_content": "",
|
||||
"reasoning": "",
|
||||
"content": "",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
@@ -296,7 +296,7 @@ def test_reasoning(
|
||||
print(f"content: {content}")
|
||||
test_id = request.node.callspec.id if hasattr(request.node, "callspec") else None
|
||||
if request.node.callspec.id != "multi_turn_prompt_content":
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
# Test is_reasoning_end
|
||||
|
||||
@@ -61,10 +61,10 @@ class ResponsesParser:
|
||||
# Store the finish_reason from the output
|
||||
self.finish_reason = output.finish_reason
|
||||
|
||||
reasoning_content, content = self.reasoning_parser_instance.extract_reasoning(
|
||||
reasoning, content = self.reasoning_parser_instance.extract_reasoning(
|
||||
output.text, request=self.request
|
||||
)
|
||||
if reasoning_content:
|
||||
if reasoning:
|
||||
self.response_messages.append(
|
||||
ResponseReasoningItem(
|
||||
type="reasoning",
|
||||
@@ -73,7 +73,7 @@ class ResponsesParser:
|
||||
content=[
|
||||
Content(
|
||||
type="reasoning_text",
|
||||
text=reasoning_content,
|
||||
text=reasoning,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
@@ -191,13 +191,13 @@ def _construct_single_message_from_response_item(
|
||||
],
|
||||
)
|
||||
elif isinstance(item, ResponseReasoningItem):
|
||||
reasoning_content = ""
|
||||
reasoning = ""
|
||||
if item.encrypted_content:
|
||||
raise ValueError("Encrypted content is not supported.")
|
||||
elif item.content and len(item.content) >= 1:
|
||||
reasoning_content = item.content[0].text
|
||||
reasoning = item.content[0].text
|
||||
elif len(item.summary) >= 1:
|
||||
reasoning_content = item.summary[0].text
|
||||
reasoning = item.summary[0].text
|
||||
logger.warning(
|
||||
"Using summary text as reasoning content for item %s. "
|
||||
"Please use content instead of summary for "
|
||||
@@ -206,7 +206,7 @@ def _construct_single_message_from_response_item(
|
||||
)
|
||||
return {
|
||||
"role": "assistant",
|
||||
"reasoning": reasoning_content,
|
||||
"reasoning": reasoning,
|
||||
}
|
||||
elif isinstance(item, ResponseOutputMessage):
|
||||
return {
|
||||
|
||||
@@ -199,7 +199,7 @@ class Parser:
|
||||
request: The request object used to generate the output.
|
||||
|
||||
Returns:
|
||||
A tuple of (reasoning_content, response_content).
|
||||
A tuple of (reasoning, response_content).
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -17,9 +17,7 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
|
||||
def extract_reasoning(
|
||||
self, model_output: str, request: ChatCompletionRequest | ResponsesRequest
|
||||
) -> tuple[str | None, str | None]:
|
||||
reasoning_content, final_content = super().extract_reasoning(
|
||||
model_output, request
|
||||
)
|
||||
reasoning, final_content = super().extract_reasoning(model_output, request)
|
||||
chat_template_kwargs = getattr(request, "chat_template_kwargs", None)
|
||||
|
||||
if (
|
||||
@@ -30,6 +28,6 @@ class NemotronV3ReasoningParser(DeepSeekR1ReasoningParser):
|
||||
)
|
||||
and final_content is None
|
||||
):
|
||||
reasoning_content, final_content = final_content, reasoning_content
|
||||
reasoning, final_content = final_content, reasoning
|
||||
|
||||
return reasoning_content, final_content
|
||||
return reasoning, final_content
|
||||
|
||||
@@ -295,7 +295,7 @@ class StreamingXMLToolCallParser:
|
||||
final_delta = DeltaMessage(
|
||||
role=None,
|
||||
content=None,
|
||||
reasoning_content=None,
|
||||
reasoning=None,
|
||||
tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=self.tool_call_index - 1,
|
||||
|
||||
Reference in New Issue
Block a user