Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -44,9 +44,7 @@ def test_tokenizer():
|
||||
# Add custom test tokens
|
||||
test_tokens = ["<test:think>", "</test:think>", "<alt:start>", "<alt:end>"]
|
||||
existing_tokens = set(tokenizer.get_vocab().keys())
|
||||
new_tokens = [
|
||||
token for token in test_tokens if token not in existing_tokens
|
||||
]
|
||||
new_tokens = [token for token in test_tokens if token not in existing_tokens]
|
||||
if new_tokens:
|
||||
tokenizer.add_tokens(new_tokens)
|
||||
return tokenizer
|
||||
@@ -54,8 +52,8 @@ def test_tokenizer():
|
||||
|
||||
class TestBaseThinkingReasoningParserInit:
|
||||
"""
|
||||
Test initialization and basic properties of
|
||||
BaseThinkingReasoningParser.
|
||||
Test initialization and basic properties of
|
||||
BaseThinkingReasoningParser.
|
||||
"""
|
||||
|
||||
def test_successful_initialization(self, test_tokenizer):
|
||||
@@ -76,7 +74,6 @@ class TestBaseThinkingReasoningParserInit:
|
||||
|
||||
# Create a parser with tokens not in vocabulary
|
||||
class MissingTokenParser(BaseThinkingReasoningParser):
|
||||
|
||||
@property
|
||||
def start_token(self) -> str:
|
||||
return "<missing:start>"
|
||||
@@ -85,15 +82,15 @@ class TestBaseThinkingReasoningParserInit:
|
||||
def end_token(self) -> str:
|
||||
return "<missing:end>"
|
||||
|
||||
with pytest.raises(RuntimeError,
|
||||
match="could not locate think start/end tokens"):
|
||||
with pytest.raises(
|
||||
RuntimeError, match="could not locate think start/end tokens"
|
||||
):
|
||||
MissingTokenParser(test_tokenizer)
|
||||
|
||||
def test_initialization_with_empty_tokens(self, test_tokenizer):
|
||||
"""Test that initialization fails with empty token strings."""
|
||||
|
||||
class EmptyTokenParser(BaseThinkingReasoningParser):
|
||||
|
||||
@property
|
||||
def start_token(self) -> str:
|
||||
return ""
|
||||
@@ -102,8 +99,9 @@ class TestBaseThinkingReasoningParserInit:
|
||||
def end_token(self) -> str:
|
||||
return ""
|
||||
|
||||
with pytest.raises(ValueError,
|
||||
match="start_token and end_token must be defined"):
|
||||
with pytest.raises(
|
||||
ValueError, match="start_token and end_token must be defined"
|
||||
):
|
||||
EmptyTokenParser(test_tokenizer)
|
||||
|
||||
|
||||
@@ -158,10 +156,8 @@ class TestBaseThinkingReasoningParserExtraction:
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = ("<test:think>This is reasoning"
|
||||
"</test:think>This is content")
|
||||
reasoning, content = parser.extract_reasoning_content(
|
||||
model_output, request)
|
||||
model_output = "<test:think>This is reasoning</test:think>This is content"
|
||||
reasoning, content = parser.extract_reasoning_content(model_output, request)
|
||||
|
||||
assert reasoning == "This is reasoning"
|
||||
assert content == "This is content"
|
||||
@@ -171,9 +167,8 @@ class TestBaseThinkingReasoningParserExtraction:
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = ("This is reasoning</test:think>This is content")
|
||||
reasoning, content = parser.extract_reasoning_content(
|
||||
model_output, request)
|
||||
model_output = "This is reasoning</test:think>This is content"
|
||||
reasoning, content = parser.extract_reasoning_content(model_output, request)
|
||||
|
||||
assert reasoning == "This is reasoning"
|
||||
assert content == "This is content"
|
||||
@@ -184,8 +179,7 @@ class TestBaseThinkingReasoningParserExtraction:
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = "This is just content"
|
||||
reasoning, content = parser.extract_reasoning_content(
|
||||
model_output, request)
|
||||
reasoning, content = parser.extract_reasoning_content(model_output, request)
|
||||
|
||||
assert reasoning == "This is just content"
|
||||
assert content is None
|
||||
@@ -196,8 +190,7 @@ class TestBaseThinkingReasoningParserExtraction:
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = ""
|
||||
reasoning, content = parser.extract_reasoning_content(
|
||||
model_output, request)
|
||||
reasoning, content = parser.extract_reasoning_content(model_output, request)
|
||||
|
||||
assert reasoning == ""
|
||||
assert content is None
|
||||
@@ -207,9 +200,8 @@ class TestBaseThinkingReasoningParserExtraction:
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = ("<test:think></test:think>")
|
||||
reasoning, content = parser.extract_reasoning_content(
|
||||
model_output, request)
|
||||
model_output = "<test:think></test:think>"
|
||||
reasoning, content = parser.extract_reasoning_content(model_output, request)
|
||||
|
||||
assert reasoning == ""
|
||||
assert content is None
|
||||
@@ -221,19 +213,24 @@ class TestBaseThinkingReasoningParserStreaming:
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_simple_reasoning_extraction(self, test_tokenizer, streaming):
|
||||
"""
|
||||
Test basic reasoning extraction in both
|
||||
streaming and non-streaming modes.
|
||||
Test basic reasoning extraction in both
|
||||
streaming and non-streaming modes.
|
||||
"""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = [
|
||||
"<test:think>", "Some ", "reasoning ", "content", "</test:think>",
|
||||
"Final ", "answer"
|
||||
"<test:think>",
|
||||
"Some ",
|
||||
"reasoning ",
|
||||
"content",
|
||||
"</test:think>",
|
||||
"Final ",
|
||||
"answer",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
model_output,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, model_output, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == "Some reasoning content"
|
||||
assert content == "Final answer"
|
||||
@@ -252,9 +249,7 @@ class TestBaseThinkingReasoningParserStreaming:
|
||||
"answer",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
deltas,
|
||||
streaming=True)
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning content"
|
||||
assert content == "Final answer"
|
||||
@@ -271,9 +266,7 @@ class TestBaseThinkingReasoningParserStreaming:
|
||||
"Answer",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
deltas,
|
||||
streaming=True)
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning"
|
||||
assert content == "Answer"
|
||||
@@ -290,9 +283,7 @@ class TestBaseThinkingReasoningParserStreaming:
|
||||
"end",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
deltas,
|
||||
streaming=True)
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning without end"
|
||||
assert content is None
|
||||
@@ -309,9 +300,7 @@ class TestBaseThinkingReasoningParserStreaming:
|
||||
"Final",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
deltas,
|
||||
streaming=True)
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Reasoning content"
|
||||
assert content == "Final"
|
||||
@@ -319,29 +308,27 @@ class TestBaseThinkingReasoningParserStreaming:
|
||||
|
||||
class TestBaseThinkingReasoningParserMultipleImplementations:
|
||||
"""
|
||||
Test that multiple implementations of
|
||||
BaseThinkingReasoningParser work correctly.
|
||||
Test that multiple implementations of
|
||||
BaseThinkingReasoningParser work correctly.
|
||||
"""
|
||||
|
||||
def test_different_token_implementations(self, test_tokenizer):
|
||||
"""
|
||||
Test that different implementations
|
||||
with different tokens work independently.
|
||||
Test that different implementations
|
||||
with different tokens work independently.
|
||||
"""
|
||||
parser1 = TestThinkingReasoningParser(test_tokenizer)
|
||||
parser2 = TestThinkingReasoningParserAlt(test_tokenizer)
|
||||
|
||||
# Test parser1
|
||||
model_output1 = ("Reasoning1</test:think>Content1")
|
||||
reasoning1, content1 = run_reasoning_extraction(
|
||||
parser1, [model_output1])
|
||||
model_output1 = "Reasoning1</test:think>Content1"
|
||||
reasoning1, content1 = run_reasoning_extraction(parser1, [model_output1])
|
||||
assert reasoning1 == "Reasoning1"
|
||||
assert content1 == "Content1"
|
||||
|
||||
# Test parser2
|
||||
model_output2 = "Reasoning2<alt:end>Content2"
|
||||
reasoning2, content2 = run_reasoning_extraction(
|
||||
parser2, [model_output2])
|
||||
reasoning2, content2 = run_reasoning_extraction(parser2, [model_output2])
|
||||
assert reasoning2 == "Reasoning2"
|
||||
assert content2 == "Content2"
|
||||
|
||||
@@ -359,7 +346,7 @@ class TestBaseThinkingReasoningParserEdgeCases:
|
||||
"""Test behavior with multiple end tokens."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = ("First</test:think>Middle</test:think>Last")
|
||||
model_output = "First</test:think>Middle</test:think>Last"
|
||||
reasoning, content = run_reasoning_extraction(parser, [model_output])
|
||||
|
||||
# Should stop at first end token
|
||||
@@ -370,8 +357,7 @@ class TestBaseThinkingReasoningParserEdgeCases:
|
||||
"""Test behavior with nested-like token patterns."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = ("<test:think>Outer"
|
||||
"<test:think>Inner</test:think>Content")
|
||||
model_output = "<test:think>Outer<test:think>Inner</test:think>Content"
|
||||
reasoning, content = run_reasoning_extraction(parser, [model_output])
|
||||
|
||||
# Should process normally, start from first start token
|
||||
@@ -382,11 +368,9 @@ class TestBaseThinkingReasoningParserEdgeCases:
|
||||
"""Test behavior with malformed token-like strings."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = ("<test:thinking>Not a real token"
|
||||
"</test:thinking>Content")
|
||||
model_output = "<test:thinking>Not a real token</test:thinking>Content"
|
||||
reasoning, content = run_reasoning_extraction(parser, [model_output])
|
||||
|
||||
# Should treat as regular content since tokens don't match exactly
|
||||
assert reasoning == ("<test:thinking>Not a real token"
|
||||
"</test:thinking>Content")
|
||||
assert reasoning == ("<test:thinking>Not a real token</test:thinking>Content")
|
||||
assert content is None
|
||||
|
||||
@@ -259,15 +259,15 @@ def test_reasoning(
|
||||
output = deepseek_r1_qwen_tokenizer.tokenize(param_dict["output"])
|
||||
# decode everything to tokens
|
||||
output_tokens: list[str] = [
|
||||
deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token])
|
||||
for token in output
|
||||
deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(deepseek_r1_qwen_tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
deepseek_r1_qwen_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
@@ -281,7 +281,8 @@ def test_reasoning(
|
||||
if param_dict["content"] is not None:
|
||||
content = parser.extract_content_ids(output_ids)
|
||||
assert content == deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(
|
||||
deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"]))
|
||||
deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"])
|
||||
)
|
||||
else:
|
||||
content = parser.extract_content_ids(output)
|
||||
assert content == []
|
||||
|
||||
@@ -54,8 +54,7 @@ COMPLETE_REASONING = {
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
MULTILINE_REASONING = {
|
||||
"output":
|
||||
"<think>This is a reasoning\nsection</think>This is the rest\nThat",
|
||||
"output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
|
||||
"reasoning_content": "This is a reasoning\nsection",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
@@ -158,12 +157,12 @@ The capital of Chile is Santiago."""
|
||||
REASONING_END_TEST_CASES = [
|
||||
pytest.param(STILL_REASONING_PROMPT, False, id="still_reasoning"),
|
||||
pytest.param(DONE_REASONING_PROMPT, True, id="done_reasoning"),
|
||||
pytest.param(MULTI_TURN_STILL_REASONING_PROMPT,
|
||||
False,
|
||||
id="multi_turn_still_reasoning"),
|
||||
pytest.param(MULTI_TURN_DONE_REASONING_PROMPT,
|
||||
True,
|
||||
id="multi_turn_done_reasoning")
|
||||
pytest.param(
|
||||
MULTI_TURN_STILL_REASONING_PROMPT, False, id="multi_turn_still_reasoning"
|
||||
),
|
||||
pytest.param(
|
||||
MULTI_TURN_DONE_REASONING_PROMPT, True, id="multi_turn_done_reasoning"
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@@ -177,12 +176,13 @@ def test_reasoning(
|
||||
output_tokens: list[str] = [
|
||||
glm45_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(glm45_tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
glm45_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
@@ -193,10 +193,12 @@ def test_reasoning(
|
||||
|
||||
|
||||
@pytest.mark.parametrize("prompt, is_reasoning_end", REASONING_END_TEST_CASES)
|
||||
def test_is_reasoning_end_full_prompt(prompt: str, is_reasoning_end: bool,
|
||||
glm45_tokenizer):
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(glm45_tokenizer)
|
||||
def test_is_reasoning_end_full_prompt(
|
||||
prompt: str, is_reasoning_end: bool, glm45_tokenizer
|
||||
):
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
glm45_tokenizer
|
||||
)
|
||||
tokens = glm45_tokenizer.tokenize(prompt)
|
||||
token_ids = glm45_tokenizer.convert_tokens_to_ids(tokens)
|
||||
check_is_reasoning_end = parser.is_reasoning_end(token_ids)
|
||||
|
||||
@@ -11,8 +11,7 @@ START_REASONING = "Here is my thought process:"
|
||||
START_RESPONSE = "Here is my response:"
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output":
|
||||
f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", #noqa: E501
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
@@ -27,14 +26,12 @@ NO_REASONING = {
|
||||
"content": "This is content",
|
||||
}
|
||||
MULTIPLE_LINES = {
|
||||
"output":
|
||||
f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning_content": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output":
|
||||
f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", #noqa: E501
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
@@ -44,8 +41,7 @@ COMPLETE_REASONING_WITH_THINK = {
|
||||
"content": None,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output":
|
||||
f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning_content": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
@@ -137,12 +133,13 @@ def test_reasoning(
|
||||
output_tokens: list[str] = [
|
||||
tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
@@ -229,18 +226,15 @@ STREAMING_9 = {
|
||||
## The Response is ongoing, and the delta mixes reasoning content / content
|
||||
STREAMING_10 = {
|
||||
"previous_text": "Here is my thought process: foo",
|
||||
"current_text":
|
||||
"Here is my thought process: foo bar Here is my response: baz",
|
||||
"current_text": "Here is my thought process: foo bar Here is my response: baz",
|
||||
"delta_text": " bar Here is my response: baz",
|
||||
"reasoning_content": " bar ",
|
||||
"content": " baz",
|
||||
}
|
||||
# The delta text starts a new substring that might be a response special seq
|
||||
STREAMING_11 = {
|
||||
"previous_text":
|
||||
"Here is my thought process: This is a reasoning section ",
|
||||
"current_text":
|
||||
"Here is my thought process: This is a reasoning section Here",
|
||||
"previous_text": "Here is my thought process: This is a reasoning section ",
|
||||
"current_text": "Here is my thought process: This is a reasoning section Here",
|
||||
"delta_text": "Here",
|
||||
"reasoning_content": None,
|
||||
"content": None,
|
||||
@@ -320,14 +314,17 @@ STREAMING_SUBCASES = [
|
||||
@pytest.mark.parametrize("param_dict", STREAMING_SUBCASES)
|
||||
def test_streaming_subcases(param_dict):
|
||||
# Get all of the token IDs
|
||||
previous_token_ids = tokenizer.encode(
|
||||
param_dict["previous_text"]
|
||||
) if param_dict["previous_text"] is not None else []
|
||||
previous_token_ids = (
|
||||
tokenizer.encode(param_dict["previous_text"])
|
||||
if param_dict["previous_text"] is not None
|
||||
else []
|
||||
)
|
||||
current_token_ids = tokenizer.encode(param_dict["current_text"])
|
||||
delta_token_ids = tokenizer.encode(param_dict["delta_text"])
|
||||
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
tokenizer
|
||||
)
|
||||
|
||||
response = parser.extract_reasoning_content_streaming(
|
||||
previous_text=param_dict["previous_text"],
|
||||
@@ -339,8 +336,7 @@ def test_streaming_subcases(param_dict):
|
||||
)
|
||||
# Streaming currently expects at least one of reasoning content / content,
|
||||
# so the response should return None in that case.
|
||||
if param_dict["reasoning_content"] is None and param_dict[
|
||||
"content"] is None:
|
||||
if param_dict["reasoning_content"] is None and param_dict["content"] is None:
|
||||
assert response is None
|
||||
else:
|
||||
assert isinstance(response, DeltaMessage)
|
||||
|
||||
@@ -13,15 +13,13 @@ START_RESPONSE = "\n</think>\n<answer>\n"
|
||||
END_RESPONSE = "\n</answer>"
|
||||
|
||||
NO_REASONING_QUICK_THROUGHT = {
|
||||
"output":
|
||||
f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", #noqa: E501
|
||||
"output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
|
||||
"reasoning_content": None,
|
||||
"content": "This is the rest",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output":
|
||||
f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}", #noqa: E501
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
@@ -42,14 +40,12 @@ NO_REASONING = {
|
||||
"content": "This is content",
|
||||
}
|
||||
MULTIPLE_LINES = {
|
||||
"output":
|
||||
f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning_content": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output":
|
||||
f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", #noqa: E501
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
@@ -59,8 +55,7 @@ COMPLETE_REASONING_WITH_THINK = {
|
||||
"content": None,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output":
|
||||
f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning_content": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
@@ -122,9 +117,7 @@ TEST_CASES = [
|
||||
NO_REASONING,
|
||||
id="no_reasoning_streaming",
|
||||
),
|
||||
pytest.param(True,
|
||||
NO_REASONING_QUICK_THROUGHT,
|
||||
id="no_reasoning_quick_stream"),
|
||||
pytest.param(True, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick_stream"),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES,
|
||||
@@ -148,8 +141,9 @@ TEST_CASES = [
|
||||
]
|
||||
|
||||
# Global tokenizer initialization to avoid repeated loading
|
||||
tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-A13B-Instruct",
|
||||
trust_remote_code=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
"tencent/Hunyuan-A13B-Instruct", trust_remote_code=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
@@ -162,12 +156,13 @@ def test_reasoning(
|
||||
output_tokens: list[str] = [
|
||||
tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
@@ -3,8 +3,7 @@
|
||||
|
||||
import pytest
|
||||
from mistral_common.tokens.tokenizers.base import SpecialTokens
|
||||
from mistral_common.tokens.tokenizers.tekken import (SpecialTokenInfo,
|
||||
Tekkenizer)
|
||||
from mistral_common.tokens.tokenizers.tekken import SpecialTokenInfo, Tekkenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction_mistral
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
@@ -18,23 +17,27 @@ def mistral_tokenizer():
|
||||
# TODO(Julien): upon model release change to a tokenizer already configured.
|
||||
# =================================================================
|
||||
mistral_tokenizer = MistralTokenizer.from_pretrained(
|
||||
"mistralai/Devstral-Small-2507")
|
||||
"mistralai/Devstral-Small-2507"
|
||||
)
|
||||
assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
|
||||
# Add think special tokens to the tokenizer
|
||||
mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
|
||||
rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
|
||||
rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
|
||||
)
|
||||
mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
|
||||
rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
|
||||
rank=36, is_control=True, token_str=SpecialTokens.end_think.value
|
||||
)
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
|
||||
k: v
|
||||
for k, v in
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
|
||||
for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
|
||||
if v not in {35, 36}
|
||||
}
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
|
||||
SpecialTokens.begin_think.value] = 35
|
||||
SpecialTokens.begin_think.value
|
||||
] = 35
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
|
||||
SpecialTokens.end_think.value] = 36
|
||||
SpecialTokens.end_think.value
|
||||
] = 36
|
||||
mistral_tokenizer.instruct.BEGIN_THINK = 35
|
||||
mistral_tokenizer.instruct.END_THINK = 36
|
||||
# =================================================================
|
||||
@@ -290,39 +293,45 @@ def test_mistral_reasoning(
|
||||
if index_think != -1:
|
||||
output_before_think = output[:index_think]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_before_think, False, False)
|
||||
output_before_think, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.BEGIN_THINK]
|
||||
|
||||
if index_end_think != -1:
|
||||
output_middle = output[index_think + len_think:index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think:]
|
||||
output_middle = output[index_think + len_think : index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_middle, False, False)
|
||||
output_middle, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.END_THINK]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_after_think, False, False)
|
||||
output_after_think, False, False
|
||||
)
|
||||
else:
|
||||
output_middle = output[index_think + len_think:]
|
||||
output_middle = output[index_think + len_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_middle, False, False)
|
||||
output_middle, False, False
|
||||
)
|
||||
elif index_end_think != -1:
|
||||
output_before_think = output[:index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think:]
|
||||
output_after_think = output[index_end_think + len_end_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_before_think, False, False)
|
||||
output_before_think, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.END_THINK]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_after_think, False, False)
|
||||
output_after_think, False, False
|
||||
)
|
||||
else:
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output, False, False)
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(output, False, False)
|
||||
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(mistral_tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
mistral_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction_mistral(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction_mistral(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
@@ -335,7 +344,8 @@ def test_mistral_reasoning(
|
||||
if param_dict["content"] is not None:
|
||||
content = parser.extract_content_ids(output_tokens)
|
||||
assert content == mistral_tokenizer.tokenizer.encode(
|
||||
param_dict["content"], bos=False, eos=False)
|
||||
param_dict["content"], bos=False, eos=False
|
||||
)
|
||||
else:
|
||||
content = parser.extract_content_ids(output_tokens)
|
||||
assert content == []
|
||||
|
||||
@@ -18,29 +18,25 @@ NO_REASONING = {
|
||||
}
|
||||
|
||||
NO_REASONING_WITH_NEWLINE = {
|
||||
"output":
|
||||
f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
|
||||
"output": f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
|
||||
"reasoning_content": "\n",
|
||||
"content": "\n\nNo thoughts, head empty!",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output":
|
||||
f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest", # noqa: E501
|
||||
"output": f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest", # noqa: E501
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING_WITH_NEWLINE = {
|
||||
"output":
|
||||
f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest", # noqa: E501
|
||||
"output": f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest", # noqa: E501
|
||||
"reasoning_content": " Look!\n\nI'm thinking...",
|
||||
"content": "\nThis is the rest",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES = {
|
||||
"output":
|
||||
f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest", # noqa: E501
|
||||
"output": f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest", # noqa: E501
|
||||
"reasoning_content": "\nLook!\nI'm thinking...\n\n",
|
||||
"content": "\n\n\nThis is the rest",
|
||||
}
|
||||
@@ -52,8 +48,7 @@ NO_REASONING_ONLY_END_THINK = {
|
||||
}
|
||||
|
||||
REASONING_ONLY_END_THINK = {
|
||||
"output":
|
||||
f"The user is asking me not to think.{END_REASONING}No thoughts!",
|
||||
"output": f"The user is asking me not to think.{END_REASONING}No thoughts!",
|
||||
"reasoning_content": "The user is asking me not to think.",
|
||||
"content": "No thoughts!",
|
||||
}
|
||||
@@ -149,9 +144,9 @@ def test_reasoning(
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser: ReasoningParser = parser_cls(tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(reasoning_parser=parser,
|
||||
model_output=model_output,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
reasoning_parser=parser, model_output=model_output, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
@@ -50,8 +50,7 @@ COMPLETE_REASONING = {
|
||||
"content": None,
|
||||
}
|
||||
MULTILINE_REASONING = {
|
||||
"output":
|
||||
"<think>This is a reasoning\nsection</think>This is the rest\nThat",
|
||||
"output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
|
||||
"reasoning_content": "This is a reasoning\nsection",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
@@ -131,12 +130,13 @@ def test_reasoning(
|
||||
output_tokens: list[str] = [
|
||||
qwen3_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(qwen3_tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
qwen3_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
@@ -57,14 +57,10 @@ MULTIPLE_LINES: dict[str, Any] = {
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
WITH_START_TOKEN: dict[str, Any] = {
|
||||
"output": ("<seed:think>This is a reasoning section"
|
||||
"</seed:think>This is the rest"),
|
||||
"reasoning_content":
|
||||
"This is a reasoning section",
|
||||
"content":
|
||||
"This is the rest",
|
||||
"is_reasoning_end":
|
||||
True,
|
||||
"output": ("<seed:think>This is a reasoning section</seed:think>This is the rest"),
|
||||
"reasoning_content": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
ONLY_END_TOKEN: dict[str, Any] = {
|
||||
"output": "Some reasoning</seed:think>This is the rest",
|
||||
@@ -96,7 +92,8 @@ def test_simple_reasoning(seedoss_tokenizer, streaming):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming)
|
||||
parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == SIMPLE_REASONING["reasoning_content"]
|
||||
assert content == SIMPLE_REASONING["content"]
|
||||
@@ -109,7 +106,8 @@ def test_complete_reasoning(seedoss_tokenizer, streaming):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming)
|
||||
parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == COMPLETE_REASONING["reasoning_content"]
|
||||
assert content == COMPLETE_REASONING["content"]
|
||||
@@ -122,7 +120,8 @@ def test_no_content(seedoss_tokenizer, streaming):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, NO_CONTENT["output"])], streaming=streaming)
|
||||
parser, [cast(str, NO_CONTENT["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == NO_CONTENT["reasoning_content"]
|
||||
assert content == NO_CONTENT["content"]
|
||||
@@ -135,7 +134,8 @@ def test_multiple_lines(seedoss_tokenizer, streaming):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming)
|
||||
parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == MULTIPLE_LINES["reasoning_content"]
|
||||
assert content == MULTIPLE_LINES["content"]
|
||||
@@ -148,7 +148,8 @@ def test_with_start_token(seedoss_tokenizer, streaming):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming)
|
||||
parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == WITH_START_TOKEN["reasoning_content"]
|
||||
assert content == WITH_START_TOKEN["content"]
|
||||
@@ -157,14 +158,15 @@ def test_with_start_token(seedoss_tokenizer, streaming):
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_only_end_token(seedoss_tokenizer, streaming):
|
||||
"""
|
||||
Test reasoning extraction with only end token
|
||||
(SeedOSS typical behavior).
|
||||
Test reasoning extraction with only end token
|
||||
(SeedOSS typical behavior).
|
||||
"""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming)
|
||||
parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == ONLY_END_TOKEN["reasoning_content"]
|
||||
assert content == ONLY_END_TOKEN["content"]
|
||||
@@ -177,7 +179,8 @@ def test_no_tokens(seedoss_tokenizer, streaming):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, NO_TOKENS["output"])], streaming=streaming)
|
||||
parser, [cast(str, NO_TOKENS["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == NO_TOKENS["reasoning_content"]
|
||||
assert content == NO_TOKENS["content"]
|
||||
@@ -225,13 +228,9 @@ def test_streaming_delta_processing(seedoss_tokenizer):
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
# Test streaming with incremental tokens
|
||||
deltas = [
|
||||
"Some ", "reasoning ", "content", "</seed:think>", "Final ", "answer"
|
||||
]
|
||||
deltas = ["Some ", "reasoning ", "content", "</seed:think>", "Final ", "answer"]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser,
|
||||
deltas,
|
||||
streaming=True)
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning content"
|
||||
assert content == "Final answer"
|
||||
|
||||
@@ -3,14 +3,12 @@
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
|
||||
DeltaMessage)
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
|
||||
from vllm.reasoning import ReasoningParser
|
||||
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
|
||||
|
||||
|
||||
class StreamingReasoningReconstructor:
|
||||
|
||||
def __init__(self):
|
||||
self.reasoning_content = None
|
||||
self.other_content = None
|
||||
@@ -19,8 +17,8 @@ class StreamingReasoningReconstructor:
|
||||
# content and the reasoning content should not be present
|
||||
# at the same time
|
||||
assert delta.content is None or delta.reasoning_content is None, (
|
||||
"Both content and reasoning content are present in the "
|
||||
"delta message")
|
||||
"Both content and reasoning content are present in the delta message"
|
||||
)
|
||||
if delta.content is not None:
|
||||
if self.other_content is None:
|
||||
self.other_content = delta.content
|
||||
@@ -51,7 +49,8 @@ def run_reasoning_extraction(
|
||||
)
|
||||
else:
|
||||
reasoning, content = run_reasoning_extraction_nonstreaming(
|
||||
reasoning_parser, model_output, request)
|
||||
reasoning_parser, model_output, request
|
||||
)
|
||||
return reasoning, content
|
||||
|
||||
|
||||
@@ -61,8 +60,9 @@ def run_reasoning_extraction_mistral(
|
||||
request: Union[ChatCompletionRequest, None] = None,
|
||||
streaming: bool = False,
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
assert isinstance(reasoning_parser.model_tokenizer,
|
||||
MistralTokenizer), type(reasoning_parser.model_tokenizer)
|
||||
assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
|
||||
reasoning_parser.model_tokenizer
|
||||
)
|
||||
if streaming:
|
||||
reconstructor = run_reasoning_extraction_streaming_mistral(
|
||||
reasoning_parser,
|
||||
@@ -75,9 +75,11 @@ def run_reasoning_extraction_mistral(
|
||||
)
|
||||
else:
|
||||
str_output = reasoning_parser.model_tokenizer.convert_ids_to_tokens(
|
||||
model_output)
|
||||
model_output
|
||||
)
|
||||
reasoning, content = run_reasoning_extraction_nonstreaming(
|
||||
reasoning_parser, str_output, request)
|
||||
reasoning_parser, str_output, request
|
||||
)
|
||||
return reasoning, content
|
||||
|
||||
|
||||
@@ -88,7 +90,8 @@ def run_reasoning_extraction_nonstreaming(
|
||||
) -> tuple[Optional[str], Optional[str]]:
|
||||
request = request or ChatCompletionRequest(messages=[], model="test-model")
|
||||
return reasoning_parser.extract_reasoning_content(
|
||||
model_output=''.join(model_output), request=request)
|
||||
model_output="".join(model_output), request=request
|
||||
)
|
||||
|
||||
|
||||
def run_reasoning_extraction_streaming(
|
||||
@@ -128,16 +131,16 @@ def run_reasoning_extraction_streaming_mistral(
|
||||
model_deltas: list[int],
|
||||
request: Union[ChatCompletionRequest, None] = None,
|
||||
) -> StreamingReasoningReconstructor:
|
||||
assert isinstance(reasoning_parser.model_tokenizer,
|
||||
MistralTokenizer), type(reasoning_parser.model_tokenizer)
|
||||
assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
|
||||
reasoning_parser.model_tokenizer
|
||||
)
|
||||
request = request or ChatCompletionRequest(messages=[], model="test-model")
|
||||
reconstructor = StreamingReasoningReconstructor()
|
||||
previous_text = ""
|
||||
previous_tokens: list[int] = []
|
||||
for model_delta in model_deltas:
|
||||
token_delta = [model_delta]
|
||||
delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens(
|
||||
[model_delta])[0]
|
||||
delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens([model_delta])[0]
|
||||
current_text = previous_text + delta
|
||||
current_tokens = previous_tokens + token_delta
|
||||
delta_message = reasoning_parser.extract_reasoning_content_streaming(
|
||||
|
||||
Reference in New Issue
Block a user