2025-02-02 14:58:18 -05:00
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2025-06-03 11:20:17 -07:00
|
|
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
2025-02-02 14:58:18 -05:00
|
|
|
|
2025-03-03 01:34:51 +00:00
|
|
|
from collections.abc import Sequence
|
2025-01-29 11:38:08 +08:00
|
|
|
|
2025-09-24 08:15:51 +08:00
|
|
|
from vllm.entrypoints.openai.protocol import DeltaMessage
|
|
|
|
|
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
|
2025-01-29 11:38:08 +08:00
|
|
|
|
|
|
|
|
|
2025-09-24 08:15:51 +08:00
|
|
|
class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser):
|
2025-01-29 11:38:08 +08:00
|
|
|
"""
|
|
|
|
|
Reasoning parser for DeepSeek R1 model.
|
|
|
|
|
|
2025-03-28 15:23:30 +08:00
|
|
|
The DeepSeek R1 model uses <think>...</think> tokens to denote reasoning
|
2025-01-29 11:38:08 +08:00
|
|
|
text. This parser extracts the reasoning content from the model output.
|
|
|
|
|
"""
|
|
|
|
|
|
2025-09-24 08:15:51 +08:00
|
|
|
@property
|
|
|
|
|
def start_token(self) -> str:
|
|
|
|
|
"""The token that starts reasoning content."""
|
|
|
|
|
return "<think>"
|
2025-01-29 11:38:08 +08:00
|
|
|
|
2025-09-24 08:15:51 +08:00
|
|
|
@property
|
|
|
|
|
def end_token(self) -> str:
|
|
|
|
|
"""The token that ends reasoning content."""
|
|
|
|
|
return "</think>"
|
2025-03-24 05:00:07 +08:00
|
|
|
|
2025-11-08 04:15:08 -08:00
|
|
|
def extract_reasoning_streaming(
|
2025-01-29 11:38:08 +08:00
|
|
|
self,
|
|
|
|
|
previous_text: str,
|
|
|
|
|
current_text: str,
|
|
|
|
|
delta_text: str,
|
|
|
|
|
previous_token_ids: Sequence[int],
|
|
|
|
|
current_token_ids: Sequence[int],
|
|
|
|
|
delta_token_ids: Sequence[int],
|
|
|
|
|
) -> DeltaMessage | None:
|
2025-11-08 04:15:08 -08:00
|
|
|
ret = super().extract_reasoning_streaming(
|
2025-09-24 08:15:51 +08:00
|
|
|
previous_text,
|
|
|
|
|
current_text,
|
|
|
|
|
delta_text,
|
|
|
|
|
previous_token_ids,
|
|
|
|
|
current_token_ids,
|
|
|
|
|
delta_token_ids,
|
|
|
|
|
)
|
|
|
|
|
if (
|
|
|
|
|
ret is not None
|
|
|
|
|
and self.start_token_id not in previous_token_ids
|
|
|
|
|
and self.start_token_id not in delta_token_ids
|
|
|
|
|
):
|
2025-03-28 15:23:30 +08:00
|
|
|
if self.end_token_id in delta_token_ids:
|
2025-09-24 08:15:51 +08:00
|
|
|
# end token in delta with more tokens,
|
2025-02-11 15:49:03 +08:00
|
|
|
# extract reasoning content and content
|
2025-03-28 15:23:30 +08:00
|
|
|
end_index = delta_text.find(self.end_token)
|
2025-11-08 04:15:08 -08:00
|
|
|
reasoning = delta_text[:end_index]
|
2025-03-28 15:23:30 +08:00
|
|
|
content = delta_text[end_index + len(self.end_token) :]
|
|
|
|
|
return DeltaMessage(
|
2025-11-08 04:15:08 -08:00
|
|
|
reasoning=reasoning,
|
2025-03-28 15:23:30 +08:00
|
|
|
content=content if content else None,
|
|
|
|
|
)
|
|
|
|
|
elif self.end_token_id in previous_token_ids:
|
2025-09-24 08:15:51 +08:00
|
|
|
# end token in previous, thinking content ends
|
2025-02-11 15:49:03 +08:00
|
|
|
return DeltaMessage(content=delta_text)
|
|
|
|
|
else:
|
2025-09-24 08:15:51 +08:00
|
|
|
# no end token in previous or delta, reasoning content continues
|
2025-11-08 04:15:08 -08:00
|
|
|
return DeltaMessage(reasoning=delta_text)
|
2025-01-29 11:38:08 +08:00
|
|
|
|
2025-09-24 08:15:51 +08:00
|
|
|
return ret
|