Convert formatting to use ruff instead of yapf + isort (#26247)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -3,8 +3,7 @@
|
||||
|
||||
import pytest
|
||||
from mistral_common.tokens.tokenizers.base import SpecialTokens
|
||||
from mistral_common.tokens.tokenizers.tekken import (SpecialTokenInfo,
|
||||
Tekkenizer)
|
||||
from mistral_common.tokens.tokenizers.tekken import SpecialTokenInfo, Tekkenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction_mistral
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
@@ -18,23 +17,27 @@ def mistral_tokenizer():
|
||||
# TODO(Julien): upon model release change to a tokenizer already configured.
|
||||
# =================================================================
|
||||
mistral_tokenizer = MistralTokenizer.from_pretrained(
|
||||
"mistralai/Devstral-Small-2507")
|
||||
"mistralai/Devstral-Small-2507"
|
||||
)
|
||||
assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
|
||||
# Add think special tokens to the tokenizer
|
||||
mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
|
||||
rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
|
||||
rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
|
||||
)
|
||||
mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
|
||||
rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
|
||||
rank=36, is_control=True, token_str=SpecialTokens.end_think.value
|
||||
)
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
|
||||
k: v
|
||||
for k, v in
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
|
||||
for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
|
||||
if v not in {35, 36}
|
||||
}
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
|
||||
SpecialTokens.begin_think.value] = 35
|
||||
SpecialTokens.begin_think.value
|
||||
] = 35
|
||||
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
|
||||
SpecialTokens.end_think.value] = 36
|
||||
SpecialTokens.end_think.value
|
||||
] = 36
|
||||
mistral_tokenizer.instruct.BEGIN_THINK = 35
|
||||
mistral_tokenizer.instruct.END_THINK = 36
|
||||
# =================================================================
|
||||
@@ -290,39 +293,45 @@ def test_mistral_reasoning(
|
||||
if index_think != -1:
|
||||
output_before_think = output[:index_think]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_before_think, False, False)
|
||||
output_before_think, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.BEGIN_THINK]
|
||||
|
||||
if index_end_think != -1:
|
||||
output_middle = output[index_think + len_think:index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think:]
|
||||
output_middle = output[index_think + len_think : index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_middle, False, False)
|
||||
output_middle, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.END_THINK]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_after_think, False, False)
|
||||
output_after_think, False, False
|
||||
)
|
||||
else:
|
||||
output_middle = output[index_think + len_think:]
|
||||
output_middle = output[index_think + len_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_middle, False, False)
|
||||
output_middle, False, False
|
||||
)
|
||||
elif index_end_think != -1:
|
||||
output_before_think = output[:index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think:]
|
||||
output_after_think = output[index_end_think + len_end_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_before_think, False, False)
|
||||
output_before_think, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.END_THINK]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_after_think, False, False)
|
||||
output_after_think, False, False
|
||||
)
|
||||
else:
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output, False, False)
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(output, False, False)
|
||||
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
|
||||
parser_name)(mistral_tokenizer)
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
mistral_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction_mistral(parser,
|
||||
output_tokens,
|
||||
streaming=streaming)
|
||||
reasoning, content = run_reasoning_extraction_mistral(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning_content"]
|
||||
assert content == param_dict["content"]
|
||||
@@ -335,7 +344,8 @@ def test_mistral_reasoning(
|
||||
if param_dict["content"] is not None:
|
||||
content = parser.extract_content_ids(output_tokens)
|
||||
assert content == mistral_tokenizer.tokenizer.encode(
|
||||
param_dict["content"], bos=False, eos=False)
|
||||
param_dict["content"], bos=False, eos=False
|
||||
)
|
||||
else:
|
||||
content = parser.extract_content_ids(output_tokens)
|
||||
assert content == []
|
||||
|
||||
Reference in New Issue
Block a user