[Bugfix][Frontend] Eliminate regex based check in reasoning full generator (#14821)

Signed-off-by: Ce Gao <cegao@tensorchord.ai>
This commit is contained in:
Ce Gao
2025-03-28 19:20:35 +08:00
committed by GitHub
parent a10314c6b3
commit 3bbaacbe15
2 changed files with 89 additions and 18 deletions

View File

@@ -1,6 +1,5 @@
# SPDX-License-Identifier: Apache-2.0
import re
from collections.abc import Sequence
from typing import Optional, Union
@@ -32,9 +31,6 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def __init__(self, tokenizer: PreTrainedTokenizerBase):
super().__init__(tokenizer)
self.reasoning_regex = re.compile(
rf"{self.start_token}(.*?){self.end_token}", re.DOTALL)
if not self.model_tokenizer:
raise ValueError(
"The model tokenizer must be passed to the ReasoningParser "
@@ -143,23 +139,34 @@ class DeepSeekR1ReasoningParser(ReasoningParser):
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]:
"""
Extract reasoning content from the model output.
For text <think>abc</think>xyz:
- 'abc' goes to reasoning_content
- 'xyz' goes to content
Returns:
tuple[Optional[str], Optional[str]]: reasoning content and content
"""
# Check if the start token is present in the model output, remove it
# if it is present.
model_output_parts = model_output.partition(self.start_token)
model_output = model_output_parts[2] if model_output_parts[
1] else model_output_parts[0]
# DeepSeek R1 doesn't generate <think> now.
# Thus we assume the reasoning content is always at the start.
# Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f
if self.end_token not in model_output:
return model_output, None
else:
# Add a start token if it's missing to keep compatibility.
if self.start_token not in model_output:
model_output = f"{self.start_token}{model_output}"
# Use a regex to find the reasoning content
reasoning_content = self.reasoning_regex.findall(model_output)[0]
end_index = len(
f"{self.start_token}{reasoning_content}{self.end_token}")
final_output = model_output[end_index:]
if len(final_output) == 0:
return reasoning_content, None
return reasoning_content, final_output
reasoning_content, _, content = model_output.partition(
self.end_token)
# If the end token is not found, return the model output as is.
# It should not happen since we already checked for the presence
# of the end token.
# If generation stops right after end-of-think, return null content
final_content = content or None
return reasoning_content, final_content