Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -7,8 +7,7 @@ from typing import Optional, Union
|
||||
import regex as re
|
||||
from transformers import PreTrainedTokenizerBase
|
||||
|
||||
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
DeltaMessage)
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
|
||||
from vllm.logger import init_logger
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
@@ -22,16 +21,16 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
|
||||
HunyuanReasoningParser
|
||||
|
||||
This class implements a reasoning parser specifically designed
|
||||
for the Hunyuan A13B Model. It is responsible for parsing and
|
||||
extracting structured reasoning and answer segments from model
|
||||
This class implements a reasoning parser specifically designed
|
||||
for the Hunyuan A13B Model. It is responsible for parsing and
|
||||
extracting structured reasoning and answer segments from model
|
||||
outputs that follow a specific pattern.
|
||||
|
||||
Key Features:
|
||||
- For non-stream output , Recognizes and extracts reasoning ("think")
|
||||
and answer ("answer") sections from text using regular expressions.
|
||||
- For stream process, it requires a token id sequences to change the
|
||||
reasoning state and other state so it maintains internal state to
|
||||
reasoning state and other state so it maintains internal state to
|
||||
manage parsing across multiple token.
|
||||
|
||||
|
||||
@@ -50,20 +49,19 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
|
||||
self.full_match_reasoning_regex = re.compile(
|
||||
rf"(?:{self.think_start_expr}(.*?){self.response_start_expr})?(.*?){self.response_end_expr}",
|
||||
re.DOTALL)
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
self.half_match_reasoning_regex = re.compile(
|
||||
rf"{self.think_start_expr}(.*?){self.response_start_expr}(.*)",
|
||||
re.DOTALL)
|
||||
rf"{self.think_start_expr}(.*?){self.response_start_expr}(.*)", re.DOTALL
|
||||
)
|
||||
|
||||
self.think_start_ids = [14023, 771, 397]
|
||||
self.think_start_ids_fast = [14023, 771, 1363]
|
||||
self.response_start_ids = [198, 524, 27963, 397, 27, 9399, 397]
|
||||
self.response_start_ids_fast = [524, 27963, 397, 27, 9399, 397]
|
||||
self.response_end_ids = [198, 524, 9399, 29]
|
||||
self.fast_think_ids = [
|
||||
14023, 771, 1363, 524, 27963, 397, 27, 9399, 397
|
||||
]
|
||||
self.fast_think_ids = [14023, 771, 1363, 524, 27963, 397, 27, 9399, 397]
|
||||
|
||||
# when state change, send out all the buffered text in last state
|
||||
self.buffered_text = []
|
||||
@@ -91,7 +89,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
return []
|
||||
|
||||
def extract_reasoning_content(
|
||||
self, model_output: str, request: ChatCompletionRequest
|
||||
self, model_output: str, request: ChatCompletionRequest
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
"""Extract the reasoning content & content sections, respectively.
|
||||
If the sequence doesn't match what we expect, i.e., the model generates
|
||||
@@ -121,8 +119,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
reasoning_content, response_content = fallback_match[0]
|
||||
|
||||
if response_content.endswith(self.response_end_expr):
|
||||
response_content = response_content[:-len(self.
|
||||
response_end_expr)]
|
||||
response_content = response_content[: -len(self.response_end_expr)]
|
||||
|
||||
if len(reasoning_content) == 0:
|
||||
reasoning_content = None
|
||||
@@ -133,8 +130,9 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
|
||||
return None, model_output
|
||||
|
||||
def _is_strict_increasing_subsequence(self, subsequence: Sequence[int],
|
||||
sequence: Sequence[int]) -> bool:
|
||||
def _is_strict_increasing_subsequence(
|
||||
self, subsequence: Sequence[int], sequence: Sequence[int]
|
||||
) -> bool:
|
||||
if not subsequence:
|
||||
return False
|
||||
|
||||
@@ -159,27 +157,27 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
response_start_sequence = self.response_start_ids
|
||||
response_end_sequence = self.response_end_ids
|
||||
|
||||
assert (len(delta_token_ids) == 1)
|
||||
assert len(delta_token_ids) == 1
|
||||
# Process each token in the delta
|
||||
token = delta_token_ids[0]
|
||||
|
||||
def check_token_with_sequence(token):
|
||||
if self.current_state == "idle" or self.current_state == "think":
|
||||
return (token == self.expected_sequence[self.sequence_index]
|
||||
or token == \
|
||||
self.expected_sequence_side[self.sequence_index])
|
||||
return (
|
||||
token == self.expected_sequence[self.sequence_index]
|
||||
or token == self.expected_sequence_side[self.sequence_index]
|
||||
)
|
||||
else:
|
||||
return token == self.expected_sequence[self.sequence_index]
|
||||
|
||||
def check_last_token(token):
|
||||
if self.current_state == "idle" or self.current_state == "think":
|
||||
# only return true if it's judge using a side sequence.
|
||||
if (self.sequence_index - 1 < len(self.expected_sequence_side)
|
||||
and token
|
||||
== self.expected_sequence_side[self.sequence_index -
|
||||
1]):
|
||||
return self.sequence_index == len(
|
||||
self.expected_sequence_side)
|
||||
if (
|
||||
self.sequence_index - 1 < len(self.expected_sequence_side)
|
||||
and token == self.expected_sequence_side[self.sequence_index - 1]
|
||||
):
|
||||
return self.sequence_index == len(self.expected_sequence_side)
|
||||
else:
|
||||
return self.sequence_index == len(self.expected_sequence)
|
||||
else:
|
||||
@@ -227,19 +225,19 @@ class HunyuanA13BReasoningParser(ReasoningParser):
|
||||
|
||||
# Return content based on current state
|
||||
if self.current_state == "think":
|
||||
return DeltaMessage(reasoning_content=buffered_content,
|
||||
content=None)
|
||||
return DeltaMessage(
|
||||
reasoning_content=buffered_content, content=None
|
||||
)
|
||||
else:
|
||||
return DeltaMessage(reasoning_content=None,
|
||||
content=buffered_content)
|
||||
return DeltaMessage(
|
||||
reasoning_content=None, content=buffered_content
|
||||
)
|
||||
else:
|
||||
# No buffered content, send normally
|
||||
if self.current_state == "think":
|
||||
return DeltaMessage(reasoning_content=delta_text,
|
||||
content=None)
|
||||
return DeltaMessage(reasoning_content=delta_text, content=None)
|
||||
else:
|
||||
return DeltaMessage(reasoning_content=None,
|
||||
content=delta_text)
|
||||
return DeltaMessage(reasoning_content=None, content=delta_text)
|
||||
|
||||
# If no content to send in this delta
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user