Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/reasoning/test_base_thinking_reasoning_parser.py
+++ b/tests/reasoning/test_base_thinking_reasoning_parser.py
@@ -44,9 +44,7 @@ def test_tokenizer():
    # Add custom test tokens
    test_tokens = ["<test:think>", "</test:think>", "<alt:start>", "<alt:end>"]
    existing_tokens = set(tokenizer.get_vocab().keys())
-    new_tokens = [
-        token for token in test_tokens if token not in existing_tokens
-    ]
+    new_tokens = [token for token in test_tokens if token not in existing_tokens]
    if new_tokens:
        tokenizer.add_tokens(new_tokens)
    return tokenizer
@@ -54,8 +52,8 @@ def test_tokenizer():

 class TestBaseThinkingReasoningParserInit:
    """
-        Test initialization and basic properties of
-        BaseThinkingReasoningParser.
+    Test initialization and basic properties of
+    BaseThinkingReasoningParser.
    """

    def test_successful_initialization(self, test_tokenizer):
@@ -76,7 +74,6 @@ class TestBaseThinkingReasoningParserInit:

        # Create a parser with tokens not in vocabulary
        class MissingTokenParser(BaseThinkingReasoningParser):
-
            @property
            def start_token(self) -> str:
                return "<missing:start>"
@@ -85,15 +82,15 @@ class TestBaseThinkingReasoningParserInit:
            def end_token(self) -> str:
                return "<missing:end>"

-        with pytest.raises(RuntimeError,
-                           match="could not locate think start/end tokens"):
+        with pytest.raises(
+            RuntimeError, match="could not locate think start/end tokens"
+        ):
            MissingTokenParser(test_tokenizer)

    def test_initialization_with_empty_tokens(self, test_tokenizer):
        """Test that initialization fails with empty token strings."""

        class EmptyTokenParser(BaseThinkingReasoningParser):
-
            @property
            def start_token(self) -> str:
                return ""
@@ -102,8 +99,9 @@ class TestBaseThinkingReasoningParserInit:
            def end_token(self) -> str:
                return ""

-        with pytest.raises(ValueError,
-                           match="start_token and end_token must be defined"):
+        with pytest.raises(
+            ValueError, match="start_token and end_token must be defined"
+        ):
            EmptyTokenParser(test_tokenizer)


@@ -158,10 +156,8 @@ class TestBaseThinkingReasoningParserExtraction:
        parser = TestThinkingReasoningParser(test_tokenizer)
        request = ChatCompletionRequest(messages=[], model="test-model")

-        model_output = ("<test:think>This is reasoning"
-                        "</test:think>This is content")
-        reasoning, content = parser.extract_reasoning_content(
-            model_output, request)
+        model_output = "<test:think>This is reasoning</test:think>This is content"
+        reasoning, content = parser.extract_reasoning_content(model_output, request)

        assert reasoning == "This is reasoning"
        assert content == "This is content"
@@ -171,9 +167,8 @@ class TestBaseThinkingReasoningParserExtraction:
        parser = TestThinkingReasoningParser(test_tokenizer)
        request = ChatCompletionRequest(messages=[], model="test-model")

-        model_output = ("This is reasoning</test:think>This is content")
-        reasoning, content = parser.extract_reasoning_content(
-            model_output, request)
+        model_output = "This is reasoning</test:think>This is content"
+        reasoning, content = parser.extract_reasoning_content(model_output, request)

        assert reasoning == "This is reasoning"
        assert content == "This is content"
@@ -184,8 +179,7 @@ class TestBaseThinkingReasoningParserExtraction:
        request = ChatCompletionRequest(messages=[], model="test-model")

        model_output = "This is just content"
-        reasoning, content = parser.extract_reasoning_content(
-            model_output, request)
+        reasoning, content = parser.extract_reasoning_content(model_output, request)

        assert reasoning == "This is just content"
        assert content is None
@@ -196,8 +190,7 @@ class TestBaseThinkingReasoningParserExtraction:
        request = ChatCompletionRequest(messages=[], model="test-model")

        model_output = ""
-        reasoning, content = parser.extract_reasoning_content(
-            model_output, request)
+        reasoning, content = parser.extract_reasoning_content(model_output, request)

        assert reasoning == ""
        assert content is None
@@ -207,9 +200,8 @@ class TestBaseThinkingReasoningParserExtraction:
        parser = TestThinkingReasoningParser(test_tokenizer)
        request = ChatCompletionRequest(messages=[], model="test-model")

-        model_output = ("<test:think></test:think>")
-        reasoning, content = parser.extract_reasoning_content(
-            model_output, request)
+        model_output = "<test:think></test:think>"
+        reasoning, content = parser.extract_reasoning_content(model_output, request)

        assert reasoning == ""
        assert content is None
@@ -221,19 +213,24 @@ class TestBaseThinkingReasoningParserStreaming:
    @pytest.mark.parametrize("streaming", [True, False])
    def test_simple_reasoning_extraction(self, test_tokenizer, streaming):
        """
-            Test basic reasoning extraction in both
-            streaming and non-streaming modes.
+        Test basic reasoning extraction in both
+        streaming and non-streaming modes.
        """
        parser = TestThinkingReasoningParser(test_tokenizer)

        model_output = [
-            "<test:think>", "Some ", "reasoning ", "content", "</test:think>",
-            "Final ", "answer"
+            "<test:think>",
+            "Some ",
+            "reasoning ",
+            "content",
+            "</test:think>",
+            "Final ",
+            "answer",
        ]

-        reasoning, content = run_reasoning_extraction(parser,
-                                                      model_output,
-                                                      streaming=streaming)
+        reasoning, content = run_reasoning_extraction(
+            parser, model_output, streaming=streaming
+        )

        assert reasoning == "Some reasoning content"
        assert content == "Final answer"
@@ -252,9 +249,7 @@ class TestBaseThinkingReasoningParserStreaming:
            "answer",
        ]

-        reasoning, content = run_reasoning_extraction(parser,
-                                                      deltas,
-                                                      streaming=True)
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)

        assert reasoning == "Some reasoning content"
        assert content == "Final answer"
@@ -271,9 +266,7 @@ class TestBaseThinkingReasoningParserStreaming:
            "Answer",
        ]

-        reasoning, content = run_reasoning_extraction(parser,
-                                                      deltas,
-                                                      streaming=True)
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)

        assert reasoning == "Some reasoning"
        assert content == "Answer"
@@ -290,9 +283,7 @@ class TestBaseThinkingReasoningParserStreaming:
            "end",
        ]

-        reasoning, content = run_reasoning_extraction(parser,
-                                                      deltas,
-                                                      streaming=True)
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)

        assert reasoning == "Some reasoning without end"
        assert content is None
@@ -309,9 +300,7 @@ class TestBaseThinkingReasoningParserStreaming:
            "Final",
        ]

-        reasoning, content = run_reasoning_extraction(parser,
-                                                      deltas,
-                                                      streaming=True)
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)

        assert reasoning == "Reasoning content"
        assert content == "Final"
@@ -319,29 +308,27 @@ class TestBaseThinkingReasoningParserStreaming:

 class TestBaseThinkingReasoningParserMultipleImplementations:
    """
-        Test that multiple implementations of
-        BaseThinkingReasoningParser work correctly.
+    Test that multiple implementations of
+    BaseThinkingReasoningParser work correctly.
    """

    def test_different_token_implementations(self, test_tokenizer):
        """
-            Test that different implementations 
-            with different tokens work independently.
+        Test that different implementations
+        with different tokens work independently.
        """
        parser1 = TestThinkingReasoningParser(test_tokenizer)
        parser2 = TestThinkingReasoningParserAlt(test_tokenizer)

        # Test parser1
-        model_output1 = ("Reasoning1</test:think>Content1")
-        reasoning1, content1 = run_reasoning_extraction(
-            parser1, [model_output1])
+        model_output1 = "Reasoning1</test:think>Content1"
+        reasoning1, content1 = run_reasoning_extraction(parser1, [model_output1])
        assert reasoning1 == "Reasoning1"
        assert content1 == "Content1"

        # Test parser2
        model_output2 = "Reasoning2<alt:end>Content2"
-        reasoning2, content2 = run_reasoning_extraction(
-            parser2, [model_output2])
+        reasoning2, content2 = run_reasoning_extraction(parser2, [model_output2])
        assert reasoning2 == "Reasoning2"
        assert content2 == "Content2"

@@ -359,7 +346,7 @@ class TestBaseThinkingReasoningParserEdgeCases:
        """Test behavior with multiple end tokens."""
        parser = TestThinkingReasoningParser(test_tokenizer)

-        model_output = ("First</test:think>Middle</test:think>Last")
+        model_output = "First</test:think>Middle</test:think>Last"
        reasoning, content = run_reasoning_extraction(parser, [model_output])

        # Should stop at first end token
@@ -370,8 +357,7 @@ class TestBaseThinkingReasoningParserEdgeCases:
        """Test behavior with nested-like token patterns."""
        parser = TestThinkingReasoningParser(test_tokenizer)

-        model_output = ("<test:think>Outer"
-                        "<test:think>Inner</test:think>Content")
+        model_output = "<test:think>Outer<test:think>Inner</test:think>Content"
        reasoning, content = run_reasoning_extraction(parser, [model_output])

        # Should process normally, start from first start token
@@ -382,11 +368,9 @@ class TestBaseThinkingReasoningParserEdgeCases:
        """Test behavior with malformed token-like strings."""
        parser = TestThinkingReasoningParser(test_tokenizer)

-        model_output = ("<test:thinking>Not a real token"
-                        "</test:thinking>Content")
+        model_output = "<test:thinking>Not a real token</test:thinking>Content"
        reasoning, content = run_reasoning_extraction(parser, [model_output])

        # Should treat as regular content since tokens don't match exactly
-        assert reasoning == ("<test:thinking>Not a real token"
-                             "</test:thinking>Content")
+        assert reasoning == ("<test:thinking>Not a real token</test:thinking>Content")
        assert content is None
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -259,15 +259,15 @@ def test_reasoning(
    output = deepseek_r1_qwen_tokenizer.tokenize(param_dict["output"])
    # decode everything to tokens
    output_tokens: list[str] = [
-        deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token])
-        for token in output
+        deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token]) for token in output
    ]
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(deepseek_r1_qwen_tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        deepseek_r1_qwen_tokenizer
+    )

-    reasoning, content = run_reasoning_extraction(parser,
-                                                  output_tokens,
-                                                  streaming=streaming)
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
@@ -281,7 +281,8 @@ def test_reasoning(
    if param_dict["content"] is not None:
        content = parser.extract_content_ids(output_ids)
        assert content == deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(
-            deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"]))
+            deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"])
+        )
    else:
        content = parser.extract_content_ids(output)
        assert content == []
--- a/tests/reasoning/test_glm4_moe_reasoning_parser.py
+++ b/tests/reasoning/test_glm4_moe_reasoning_parser.py
@@ -54,8 +54,7 @@ COMPLETE_REASONING = {
    "is_reasoning_end": True,
 }
 MULTILINE_REASONING = {
-    "output":
-    "<think>This is a reasoning\nsection</think>This is the rest\nThat",
+    "output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
    "reasoning_content": "This is a reasoning\nsection",
    "content": "This is the rest\nThat",
    "is_reasoning_end": True,
@@ -158,12 +157,12 @@ The capital of Chile is Santiago."""
 REASONING_END_TEST_CASES = [
    pytest.param(STILL_REASONING_PROMPT, False, id="still_reasoning"),
    pytest.param(DONE_REASONING_PROMPT, True, id="done_reasoning"),
-    pytest.param(MULTI_TURN_STILL_REASONING_PROMPT,
-                 False,
-                 id="multi_turn_still_reasoning"),
-    pytest.param(MULTI_TURN_DONE_REASONING_PROMPT,
-                 True,
-                 id="multi_turn_done_reasoning")
+    pytest.param(
+        MULTI_TURN_STILL_REASONING_PROMPT, False, id="multi_turn_still_reasoning"
+    ),
+    pytest.param(
+        MULTI_TURN_DONE_REASONING_PROMPT, True, id="multi_turn_done_reasoning"
+    ),
 ]


@@ -177,12 +176,13 @@ def test_reasoning(
    output_tokens: list[str] = [
        glm45_tokenizer.convert_tokens_to_string([token]) for token in output
    ]
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(glm45_tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        glm45_tokenizer
+    )

-    reasoning, content = run_reasoning_extraction(parser,
-                                                  output_tokens,
-                                                  streaming=streaming)
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
@@ -193,10 +193,12 @@ def test_reasoning(


@pytest.mark.parametrize("prompt, is_reasoning_end", REASONING_END_TEST_CASES)
-def test_is_reasoning_end_full_prompt(prompt: str, is_reasoning_end: bool,
-                                      glm45_tokenizer):
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(glm45_tokenizer)
+def test_is_reasoning_end_full_prompt(
+    prompt: str, is_reasoning_end: bool, glm45_tokenizer
+):
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        glm45_tokenizer
+    )
    tokens = glm45_tokenizer.tokenize(prompt)
    token_ids = glm45_tokenizer.convert_tokens_to_ids(tokens)
    check_is_reasoning_end = parser.is_reasoning_end(token_ids)
--- a/tests/reasoning/test_granite_reasoning_parser.py
+++ b/tests/reasoning/test_granite_reasoning_parser.py
@@ -11,8 +11,7 @@ START_REASONING = "Here is my thought process:"
 START_RESPONSE = "Here is my response:"

 SIMPLE_REASONING = {
-    "output":
-    f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  #noqa: E501
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  # noqa: E501
    "reasoning_content": "This is a reasoning section",
    "content": "This is the rest",
 }
@@ -27,14 +26,12 @@ NO_REASONING = {
    "content": "This is content",
 }
 MULTIPLE_LINES = {
-    "output":
-    f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
    "reasoning_content": "This\nThat",
    "content": "This is the rest\nThat",
 }
 REASONING_WITH_THINK = {
-    "output":
-    f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  #noqa: E501
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  # noqa: E501
    "reasoning_content": "This is a reasoning section",
    "content": "This is the rest",
 }
@@ -44,8 +41,7 @@ COMPLETE_REASONING_WITH_THINK = {
    "content": None,
 }
 MULTIPLE_LINES_WITH_THINK = {
-    "output":
-    f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
    "reasoning_content": "This\nThat",
    "content": "This is the rest\nThat",
 }
@@ -137,12 +133,13 @@ def test_reasoning(
    output_tokens: list[str] = [
        tokenizer.convert_tokens_to_string([token]) for token in output
    ]
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        tokenizer
+    )

-    reasoning, content = run_reasoning_extraction(parser,
-                                                  output_tokens,
-                                                  streaming=streaming)
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
@@ -229,18 +226,15 @@ STREAMING_9 = {
 ## The Response is ongoing, and the delta mixes reasoning content / content
 STREAMING_10 = {
    "previous_text": "Here is my thought process: foo",
-    "current_text":
-    "Here is my thought process: foo bar Here is my response: baz",
+    "current_text": "Here is my thought process: foo bar Here is my response: baz",
    "delta_text": " bar Here is my response: baz",
    "reasoning_content": " bar ",
    "content": " baz",
 }
 # The delta text starts a new substring that might be a response special seq
 STREAMING_11 = {
-    "previous_text":
-    "Here is my thought process: This is a reasoning section ",
-    "current_text":
-    "Here is my thought process: This is a reasoning section Here",
+    "previous_text": "Here is my thought process: This is a reasoning section ",
+    "current_text": "Here is my thought process: This is a reasoning section Here",
    "delta_text": "Here",
    "reasoning_content": None,
    "content": None,
@@ -320,14 +314,17 @@ STREAMING_SUBCASES = [
@pytest.mark.parametrize("param_dict", STREAMING_SUBCASES)
 def test_streaming_subcases(param_dict):
    # Get all of the token IDs
-    previous_token_ids = tokenizer.encode(
-        param_dict["previous_text"]
-    ) if param_dict["previous_text"] is not None else []
+    previous_token_ids = (
+        tokenizer.encode(param_dict["previous_text"])
+        if param_dict["previous_text"] is not None
+        else []
+    )
    current_token_ids = tokenizer.encode(param_dict["current_text"])
    delta_token_ids = tokenizer.encode(param_dict["delta_text"])

-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        tokenizer
+    )

    response = parser.extract_reasoning_content_streaming(
        previous_text=param_dict["previous_text"],
@@ -339,8 +336,7 @@ def test_streaming_subcases(param_dict):
    )
    # Streaming currently expects at least one of reasoning content / content,
    # so the response should return None in that case.
-    if param_dict["reasoning_content"] is None and param_dict[
-            "content"] is None:
+    if param_dict["reasoning_content"] is None and param_dict["content"] is None:
        assert response is None
    else:
        assert isinstance(response, DeltaMessage)
--- a/tests/reasoning/test_hunyuan_reasoning_parser.py
+++ b/tests/reasoning/test_hunyuan_reasoning_parser.py
@@ -13,15 +13,13 @@ START_RESPONSE = "\n</think>\n<answer>\n"
 END_RESPONSE = "\n</answer>"

 NO_REASONING_QUICK_THROUGHT = {
-    "output":
-    f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}",  #noqa: E501
+    "output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}",  # noqa: E501
    "reasoning_content": None,
    "content": "This is the rest",
 }

 SIMPLE_REASONING = {
-    "output":
-    f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}",  #noqa: E501
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}",  # noqa: E501
    "reasoning_content": "This is a reasoning section",
    "content": "This is the rest",
 }
@@ -42,14 +40,12 @@ NO_REASONING = {
    "content": "This is content",
 }
 MULTIPLE_LINES = {
-    "output":
-    f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
    "reasoning_content": "This\nThat",
    "content": "This is the rest\nThat",
 }
 REASONING_WITH_THINK = {
-    "output":
-    f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  #noqa: E501
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  # noqa: E501
    "reasoning_content": "This is a reasoning section",
    "content": "This is the rest",
 }
@@ -59,8 +55,7 @@ COMPLETE_REASONING_WITH_THINK = {
    "content": None,
 }
 MULTIPLE_LINES_WITH_THINK = {
-    "output":
-    f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
    "reasoning_content": "This\nThat",
    "content": "This is the rest\nThat",
 }
@@ -122,9 +117,7 @@ TEST_CASES = [
        NO_REASONING,
        id="no_reasoning_streaming",
    ),
-    pytest.param(True,
-                 NO_REASONING_QUICK_THROUGHT,
-                 id="no_reasoning_quick_stream"),
+    pytest.param(True, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick_stream"),
    pytest.param(
        True,
        MULTIPLE_LINES,
@@ -148,8 +141,9 @@ TEST_CASES = [
 ]

 # Global tokenizer initialization to avoid repeated loading
-tokenizer = AutoTokenizer.from_pretrained("tencent/Hunyuan-A13B-Instruct",
-                                          trust_remote_code=True)
+tokenizer = AutoTokenizer.from_pretrained(
+    "tencent/Hunyuan-A13B-Instruct", trust_remote_code=True
+)


@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
@@ -162,12 +156,13 @@ def test_reasoning(
    output_tokens: list[str] = [
        tokenizer.convert_tokens_to_string([token]) for token in output
    ]
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        tokenizer
+    )

-    reasoning, content = run_reasoning_extraction(parser,
-                                                  output_tokens,
-                                                  streaming=streaming)
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
--- a/tests/reasoning/test_mistral_reasoning_parser.py
+++ b/tests/reasoning/test_mistral_reasoning_parser.py
@@ -3,8 +3,7 @@

 import pytest
 from mistral_common.tokens.tokenizers.base import SpecialTokens
-from mistral_common.tokens.tokenizers.tekken import (SpecialTokenInfo,
-                                                     Tekkenizer)
+from mistral_common.tokens.tokenizers.tekken import SpecialTokenInfo, Tekkenizer

 from tests.reasoning.utils import run_reasoning_extraction_mistral
 from vllm.reasoning import ReasoningParser, ReasoningParserManager
@@ -18,23 +17,27 @@ def mistral_tokenizer():
    # TODO(Julien): upon model release change to a tokenizer already configured.
    # =================================================================
    mistral_tokenizer = MistralTokenizer.from_pretrained(
-        "mistralai/Devstral-Small-2507")
+        "mistralai/Devstral-Small-2507"
+    )
    assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
    # Add think special tokens to the tokenizer
    mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
-        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value)
+        rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
+    )
    mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
-        rank=36, is_control=True, token_str=SpecialTokens.end_think.value)
+        rank=36, is_control=True, token_str=SpecialTokens.end_think.value
+    )
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
        k: v
-        for k, v in
-        mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
+        for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
        if v not in {35, 36}
    }
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.begin_think.value] = 35
+        SpecialTokens.begin_think.value
+    ] = 35
    mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
-        SpecialTokens.end_think.value] = 36
+        SpecialTokens.end_think.value
+    ] = 36
    mistral_tokenizer.instruct.BEGIN_THINK = 35
    mistral_tokenizer.instruct.END_THINK = 36
    # =================================================================
@@ -290,39 +293,45 @@ def test_mistral_reasoning(
    if index_think != -1:
        output_before_think = output[:index_think]
        output_tokens += mistral_tokenizer.tokenizer.encode(
-            output_before_think, False, False)
+            output_before_think, False, False
+        )
        output_tokens += [mistral_tokenizer.instruct.BEGIN_THINK]

        if index_end_think != -1:
-            output_middle = output[index_think + len_think:index_end_think]
-            output_after_think = output[index_end_think + len_end_think:]
+            output_middle = output[index_think + len_think : index_end_think]
+            output_after_think = output[index_end_think + len_end_think :]
            output_tokens += mistral_tokenizer.tokenizer.encode(
-                output_middle, False, False)
+                output_middle, False, False
+            )
            output_tokens += [mistral_tokenizer.instruct.END_THINK]
            output_tokens += mistral_tokenizer.tokenizer.encode(
-                output_after_think, False, False)
+                output_after_think, False, False
+            )
        else:
-            output_middle = output[index_think + len_think:]
+            output_middle = output[index_think + len_think :]
            output_tokens += mistral_tokenizer.tokenizer.encode(
-                output_middle, False, False)
+                output_middle, False, False
+            )
    elif index_end_think != -1:
        output_before_think = output[:index_end_think]
-        output_after_think = output[index_end_think + len_end_think:]
+        output_after_think = output[index_end_think + len_end_think :]
        output_tokens += mistral_tokenizer.tokenizer.encode(
-            output_before_think, False, False)
+            output_before_think, False, False
+        )
        output_tokens += [mistral_tokenizer.instruct.END_THINK]
        output_tokens += mistral_tokenizer.tokenizer.encode(
-            output_after_think, False, False)
+            output_after_think, False, False
+        )
    else:
-        output_tokens += mistral_tokenizer.tokenizer.encode(
-            output, False, False)
+        output_tokens += mistral_tokenizer.tokenizer.encode(output, False, False)

-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(mistral_tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        mistral_tokenizer
+    )

-    reasoning, content = run_reasoning_extraction_mistral(parser,
-                                                          output_tokens,
-                                                          streaming=streaming)
+    reasoning, content = run_reasoning_extraction_mistral(
+        parser, output_tokens, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
@@ -335,7 +344,8 @@ def test_mistral_reasoning(
    if param_dict["content"] is not None:
        content = parser.extract_content_ids(output_tokens)
        assert content == mistral_tokenizer.tokenizer.encode(
-            param_dict["content"], bos=False, eos=False)
+            param_dict["content"], bos=False, eos=False
+        )
    else:
        content = parser.extract_content_ids(output_tokens)
        assert content == []
--- a/tests/reasoning/test_olmo3_reasoning_parser.py
+++ b/tests/reasoning/test_olmo3_reasoning_parser.py
@@ -18,29 +18,25 @@ NO_REASONING = {
 }

 NO_REASONING_WITH_NEWLINE = {
-    "output":
-    f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
+    "output": f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
    "reasoning_content": "\n",
    "content": "\n\nNo thoughts, head empty!",
 }

 SIMPLE_REASONING = {
-    "output":
-    f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest",  # noqa: E501
+    "output": f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest",  # noqa: E501
    "reasoning_content": "This is a reasoning section",
    "content": "This is the rest",
 }

 SIMPLE_REASONING_WITH_NEWLINE = {
-    "output":
-    f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest",  # noqa: E501
+    "output": f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest",  # noqa: E501
    "reasoning_content": " Look!\n\nI'm thinking...",
    "content": "\nThis is the rest",
 }

 SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES = {
-    "output":
-    f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest",  # noqa: E501
+    "output": f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest",  # noqa: E501
    "reasoning_content": "\nLook!\nI'm thinking...\n\n",
    "content": "\n\n\nThis is the rest",
 }
@@ -52,8 +48,7 @@ NO_REASONING_ONLY_END_THINK = {
 }

 REASONING_ONLY_END_THINK = {
-    "output":
-    f"The user is asking me not to think.{END_REASONING}No thoughts!",
+    "output": f"The user is asking me not to think.{END_REASONING}No thoughts!",
    "reasoning_content": "The user is asking me not to think.",
    "content": "No thoughts!",
 }
@@ -149,9 +144,9 @@ def test_reasoning(
    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
    parser: ReasoningParser = parser_cls(tokenizer)

-    reasoning, content = run_reasoning_extraction(reasoning_parser=parser,
-                                                  model_output=model_output,
-                                                  streaming=streaming)
+    reasoning, content = run_reasoning_extraction(
+        reasoning_parser=parser, model_output=model_output, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
--- a/tests/reasoning/test_qwen3_reasoning_parser.py
+++ b/tests/reasoning/test_qwen3_reasoning_parser.py
@@ -50,8 +50,7 @@ COMPLETE_REASONING = {
    "content": None,
 }
 MULTILINE_REASONING = {
-    "output":
-    "<think>This is a reasoning\nsection</think>This is the rest\nThat",
+    "output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
    "reasoning_content": "This is a reasoning\nsection",
    "content": "This is the rest\nThat",
 }
@@ -131,12 +130,13 @@ def test_reasoning(
    output_tokens: list[str] = [
        qwen3_tokenizer.convert_tokens_to_string([token]) for token in output
    ]
-    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(
-        parser_name)(qwen3_tokenizer)
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        qwen3_tokenizer
+    )

-    reasoning, content = run_reasoning_extraction(parser,
-                                                  output_tokens,
-                                                  streaming=streaming)
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )

    assert reasoning == param_dict["reasoning_content"]
    assert content == param_dict["content"]
--- a/tests/reasoning/test_seedoss_reasoning_parser.py
+++ b/tests/reasoning/test_seedoss_reasoning_parser.py
@@ -57,14 +57,10 @@ MULTIPLE_LINES: dict[str, Any] = {
    "is_reasoning_end": True,
 }
 WITH_START_TOKEN: dict[str, Any] = {
-    "output": ("<seed:think>This is a reasoning section"
-               "</seed:think>This is the rest"),
-    "reasoning_content":
-    "This is a reasoning section",
-    "content":
-    "This is the rest",
-    "is_reasoning_end":
-    True,
+    "output": ("<seed:think>This is a reasoning section</seed:think>This is the rest"),
+    "reasoning_content": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
 }
 ONLY_END_TOKEN: dict[str, Any] = {
    "output": "Some reasoning</seed:think>This is the rest",
@@ -96,7 +92,8 @@ def test_simple_reasoning(seedoss_tokenizer, streaming):
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming)
+        parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming
+    )

    assert reasoning == SIMPLE_REASONING["reasoning_content"]
    assert content == SIMPLE_REASONING["content"]
@@ -109,7 +106,8 @@ def test_complete_reasoning(seedoss_tokenizer, streaming):
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming)
+        parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming
+    )

    assert reasoning == COMPLETE_REASONING["reasoning_content"]
    assert content == COMPLETE_REASONING["content"]
@@ -122,7 +120,8 @@ def test_no_content(seedoss_tokenizer, streaming):
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, NO_CONTENT["output"])], streaming=streaming)
+        parser, [cast(str, NO_CONTENT["output"])], streaming=streaming
+    )

    assert reasoning == NO_CONTENT["reasoning_content"]
    assert content == NO_CONTENT["content"]
@@ -135,7 +134,8 @@ def test_multiple_lines(seedoss_tokenizer, streaming):
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming)
+        parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming
+    )

    assert reasoning == MULTIPLE_LINES["reasoning_content"]
    assert content == MULTIPLE_LINES["content"]
@@ -148,7 +148,8 @@ def test_with_start_token(seedoss_tokenizer, streaming):
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming)
+        parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming
+    )

    assert reasoning == WITH_START_TOKEN["reasoning_content"]
    assert content == WITH_START_TOKEN["content"]
@@ -157,14 +158,15 @@ def test_with_start_token(seedoss_tokenizer, streaming):
@pytest.mark.parametrize("streaming", [True, False])
 def test_only_end_token(seedoss_tokenizer, streaming):
    """
-        Test reasoning extraction with only end token
-        (SeedOSS typical behavior).
+    Test reasoning extraction with only end token
+    (SeedOSS typical behavior).
    """
    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming)
+        parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming
+    )

    assert reasoning == ONLY_END_TOKEN["reasoning_content"]
    assert content == ONLY_END_TOKEN["content"]
@@ -177,7 +179,8 @@ def test_no_tokens(seedoss_tokenizer, streaming):
    parser = parser_cls(seedoss_tokenizer)

    reasoning, content = run_reasoning_extraction(
-        parser, [cast(str, NO_TOKENS["output"])], streaming=streaming)
+        parser, [cast(str, NO_TOKENS["output"])], streaming=streaming
+    )

    assert reasoning == NO_TOKENS["reasoning_content"]
    assert content == NO_TOKENS["content"]
@@ -225,13 +228,9 @@ def test_streaming_delta_processing(seedoss_tokenizer):
    parser = parser_cls(seedoss_tokenizer)

    # Test streaming with incremental tokens
-    deltas = [
-        "Some ", "reasoning ", "content", "</seed:think>", "Final ", "answer"
-    ]
+    deltas = ["Some ", "reasoning ", "content", "</seed:think>", "Final ", "answer"]

-    reasoning, content = run_reasoning_extraction(parser,
-                                                  deltas,
-                                                  streaming=True)
+    reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)

    assert reasoning == "Some reasoning content"
    assert content == "Final answer"
--- a/tests/reasoning/utils.py
+++ b/tests/reasoning/utils.py
@@ -3,14 +3,12 @@

 from typing import Optional, Union

-from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
-                                              DeltaMessage)
+from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
 from vllm.reasoning import ReasoningParser
 from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer


 class StreamingReasoningReconstructor:
-
    def __init__(self):
        self.reasoning_content = None
        self.other_content = None
@@ -19,8 +17,8 @@ class StreamingReasoningReconstructor:
        # content and the reasoning content should not be present
        # at the same time
        assert delta.content is None or delta.reasoning_content is None, (
-            "Both content and reasoning content are present in the "
-            "delta message")
+            "Both content and reasoning content are present in the delta message"
+        )
        if delta.content is not None:
            if self.other_content is None:
                self.other_content = delta.content
@@ -51,7 +49,8 @@ def run_reasoning_extraction(
        )
    else:
        reasoning, content = run_reasoning_extraction_nonstreaming(
-            reasoning_parser, model_output, request)
+            reasoning_parser, model_output, request
+        )
        return reasoning, content


@@ -61,8 +60,9 @@ def run_reasoning_extraction_mistral(
    request: Union[ChatCompletionRequest, None] = None,
    streaming: bool = False,
 ) -> tuple[Optional[str], Optional[str]]:
-    assert isinstance(reasoning_parser.model_tokenizer,
-                      MistralTokenizer), type(reasoning_parser.model_tokenizer)
+    assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
+        reasoning_parser.model_tokenizer
+    )
    if streaming:
        reconstructor = run_reasoning_extraction_streaming_mistral(
            reasoning_parser,
@@ -75,9 +75,11 @@ def run_reasoning_extraction_mistral(
        )
    else:
        str_output = reasoning_parser.model_tokenizer.convert_ids_to_tokens(
-            model_output)
+            model_output
+        )
        reasoning, content = run_reasoning_extraction_nonstreaming(
-            reasoning_parser, str_output, request)
+            reasoning_parser, str_output, request
+        )
        return reasoning, content


@@ -88,7 +90,8 @@ def run_reasoning_extraction_nonstreaming(
 ) -> tuple[Optional[str], Optional[str]]:
    request = request or ChatCompletionRequest(messages=[], model="test-model")
    return reasoning_parser.extract_reasoning_content(
-        model_output=''.join(model_output), request=request)
+        model_output="".join(model_output), request=request
+    )


 def run_reasoning_extraction_streaming(
@@ -128,16 +131,16 @@ def run_reasoning_extraction_streaming_mistral(
    model_deltas: list[int],
    request: Union[ChatCompletionRequest, None] = None,
 ) -> StreamingReasoningReconstructor:
-    assert isinstance(reasoning_parser.model_tokenizer,
-                      MistralTokenizer), type(reasoning_parser.model_tokenizer)
+    assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
+        reasoning_parser.model_tokenizer
+    )
    request = request or ChatCompletionRequest(messages=[], model="test-model")
    reconstructor = StreamingReasoningReconstructor()
    previous_text = ""
    previous_tokens: list[int] = []
    for model_delta in model_deltas:
        token_delta = [model_delta]
-        delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens(
-            [model_delta])[0]
+        delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens([model_delta])[0]
        current_text = previous_text + delta
        current_tokens = previous_tokens + token_delta
        delta_message = reasoning_parser.extract_reasoning_content_streaming(