Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-05 15:06:22 +01:00
committed by GitHub
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions

View File

@@ -7,8 +7,7 @@ from typing import Optional, Union
import regex as re
from transformers import PreTrainedTokenizerBase
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
DeltaMessage)
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
from vllm.logger import init_logger
from vllm.reasoning import ReasoningParser, ReasoningParserManager
@@ -22,16 +21,16 @@ class HunyuanA13BReasoningParser(ReasoningParser):
HunyuanReasoningParser
This class implements a reasoning parser specifically designed
for the Hunyuan A13B Model. It is responsible for parsing and
extracting structured reasoning and answer segments from model
This class implements a reasoning parser specifically designed
for the Hunyuan A13B Model. It is responsible for parsing and
extracting structured reasoning and answer segments from model
outputs that follow a specific pattern.
Key Features:
- For non-stream output , Recognizes and extracts reasoning ("think")
and answer ("answer") sections from text using regular expressions.
- For stream process, it requires a token id sequences to change the
reasoning state and other state so it maintains internal state to
reasoning state and other state so it maintains internal state to
manage parsing across multiple token.
@@ -50,20 +49,19 @@ class HunyuanA13BReasoningParser(ReasoningParser):
self.full_match_reasoning_regex = re.compile(
rf"(?:{self.think_start_expr}(.*?){self.response_start_expr})?(.*?){self.response_end_expr}",
re.DOTALL)
re.DOTALL,
)
self.half_match_reasoning_regex = re.compile(
rf"{self.think_start_expr}(.*?){self.response_start_expr}(.*)",
re.DOTALL)
rf"{self.think_start_expr}(.*?){self.response_start_expr}(.*)", re.DOTALL
)
self.think_start_ids = [14023, 771, 397]
self.think_start_ids_fast = [14023, 771, 1363]
self.response_start_ids = [198, 524, 27963, 397, 27, 9399, 397]
self.response_start_ids_fast = [524, 27963, 397, 27, 9399, 397]
self.response_end_ids = [198, 524, 9399, 29]
self.fast_think_ids = [
14023, 771, 1363, 524, 27963, 397, 27, 9399, 397
]
self.fast_think_ids = [14023, 771, 1363, 524, 27963, 397, 27, 9399, 397]
# when state change, send out all the buffered text in last state
self.buffered_text = []
@@ -91,7 +89,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
return []
def extract_reasoning_content(
self, model_output: str, request: ChatCompletionRequest
self, model_output: str, request: ChatCompletionRequest
) -> tuple[Optional[str], Optional[str]]:
"""Extract the reasoning content & content sections, respectively.
If the sequence doesn't match what we expect, i.e., the model generates
@@ -121,8 +119,7 @@ class HunyuanA13BReasoningParser(ReasoningParser):
reasoning_content, response_content = fallback_match[0]
if response_content.endswith(self.response_end_expr):
response_content = response_content[:-len(self.
response_end_expr)]
response_content = response_content[: -len(self.response_end_expr)]
if len(reasoning_content) == 0:
reasoning_content = None
@@ -133,8 +130,9 @@ class HunyuanA13BReasoningParser(ReasoningParser):
return None, model_output
def _is_strict_increasing_subsequence(self, subsequence: Sequence[int],
sequence: Sequence[int]) -> bool:
def _is_strict_increasing_subsequence(
self, subsequence: Sequence[int], sequence: Sequence[int]
) -> bool:
if not subsequence:
return False
@@ -159,27 +157,27 @@ class HunyuanA13BReasoningParser(ReasoningParser):
response_start_sequence = self.response_start_ids
response_end_sequence = self.response_end_ids
assert (len(delta_token_ids) == 1)
assert len(delta_token_ids) == 1
# Process each token in the delta
token = delta_token_ids[0]
def check_token_with_sequence(token):
if self.current_state == "idle" or self.current_state == "think":
return (token == self.expected_sequence[self.sequence_index]
or token == \
self.expected_sequence_side[self.sequence_index])
return (
token == self.expected_sequence[self.sequence_index]
or token == self.expected_sequence_side[self.sequence_index]
)
else:
return token == self.expected_sequence[self.sequence_index]
def check_last_token(token):
if self.current_state == "idle" or self.current_state == "think":
# only return true if it's judge using a side sequence.
if (self.sequence_index - 1 < len(self.expected_sequence_side)
and token
== self.expected_sequence_side[self.sequence_index -
1]):
return self.sequence_index == len(
self.expected_sequence_side)
if (
self.sequence_index - 1 < len(self.expected_sequence_side)
and token == self.expected_sequence_side[self.sequence_index - 1]
):
return self.sequence_index == len(self.expected_sequence_side)
else:
return self.sequence_index == len(self.expected_sequence)
else:
@@ -227,19 +225,19 @@ class HunyuanA13BReasoningParser(ReasoningParser):
# Return content based on current state
if self.current_state == "think":
return DeltaMessage(reasoning_content=buffered_content,
content=None)
return DeltaMessage(
reasoning_content=buffered_content, content=None
)
else:
return DeltaMessage(reasoning_content=None,
content=buffered_content)
return DeltaMessage(
reasoning_content=None, content=buffered_content
)
else:
# No buffered content, send normally
if self.current_state == "think":
return DeltaMessage(reasoning_content=delta_text,
content=None)
return DeltaMessage(reasoning_content=delta_text, content=None)
else:
return DeltaMessage(reasoning_content=None,
content=delta_text)
return DeltaMessage(reasoning_content=None, content=delta_text)
# If no content to send in this delta
return None