Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/detokenizer/test_disable_detokenization.py
+++ b/tests/detokenizer/test_disable_detokenization.py
@@ -17,20 +17,16 @@ def test_computed_prefix_blocks(model: str):
    prompt = (
        "You are a helpful assistant. How do I build a car from cardboard and "
        "paper clips? Is there an easy to follow video tutorial available "
-        "online for free?")
+        "online for free?"
+    )

    llm = LLM(model=model)
-    sampling_params = SamplingParams(max_tokens=10,
-                                     temperature=0.0,
-                                     detokenize=False)
+    sampling_params = SamplingParams(max_tokens=10, temperature=0.0, detokenize=False)

-    outputs_no_detokenization = llm.generate(prompt,
-                                             sampling_params)[0].outputs[0]
+    outputs_no_detokenization = llm.generate(prompt, sampling_params)[0].outputs[0]
    sampling_params.detokenize = True
-    outputs_with_detokenization = llm.generate(prompt,
-                                               sampling_params)[0].outputs[0]
+    outputs_with_detokenization = llm.generate(prompt, sampling_params)[0].outputs[0]

-    assert outputs_no_detokenization.text == ''
-    assert outputs_with_detokenization.text != ''
-    assert outputs_no_detokenization.token_ids == \
-        outputs_with_detokenization.token_ids
+    assert outputs_no_detokenization.text == ""
+    assert outputs_with_detokenization.text != ""
+    assert outputs_no_detokenization.token_ids == outputs_with_detokenization.token_ids
--- a/tests/detokenizer/test_min_tokens.py
+++ b/tests/detokenizer/test_min_tokens.py
@@ -8,15 +8,17 @@ from vllm import SamplingParams
 from vllm.v1.engine import EngineCoreRequest
 from vllm.v1.engine.detokenizer import FastIncrementalDetokenizer

-PROMPT = "Hello, my name is Lee, and I'm a student in the " + \
-         "college of engineering"
+PROMPT = "Hello, my name is Lee, and I'm a student in the " + "college of engineering"


-@pytest.mark.parametrize("min_tokens,stop,truth", [
-    (0, None, " is Lee, and I'm a student in the college of engineering"),
-    (0, "e", " is L"),
-    (5, "e", " is Lee, and I'm a stud"),
-])
+@pytest.mark.parametrize(
+    "min_tokens,stop,truth",
+    [
+        (0, None, " is Lee, and I'm a student in the college of engineering"),
+        (0, "e", " is L"),
+        (5, "e", " is Lee, and I'm a stud"),
+    ],
+)
 def test_min_tokens_with_stop(min_tokens: int, stop: str, truth: str):
    """Test for a specific min_tokens and stop.

@@ -31,16 +33,18 @@ def test_min_tokens_with_stop(min_tokens: int, stop: str, truth: str):
        stop=stop,
        min_tokens=min_tokens,
    )
-    request = EngineCoreRequest(request_id="",
-                                prompt_token_ids=prompt_token_ids,
-                                mm_features=None,
-                                sampling_params=params,
-                                pooling_params=None,
-                                eos_token_id=None,
-                                arrival_time=0.0,
-                                lora_request=None,
-                                cache_salt=None,
-                                data_parallel_rank=None)
+    request = EngineCoreRequest(
+        request_id="",
+        prompt_token_ids=prompt_token_ids,
+        mm_features=None,
+        sampling_params=params,
+        pooling_params=None,
+        eos_token_id=None,
+        arrival_time=0.0,
+        lora_request=None,
+        cache_salt=None,
+        data_parallel_rank=None,
+    )

    detokenizer = FastIncrementalDetokenizer(tokenizer, request)

--- a/tests/detokenizer/test_stop_reason.py
+++ b/tests/detokenizer/test_stop_reason.py
@@ -31,34 +31,39 @@ def test_stop_reason(vllm_model, example_prompts):
    llm = vllm_model.llm

    # test stop token
-    outputs = llm.generate(example_prompts,
-                           sampling_params=SamplingParams(
-                               ignore_eos=True,
-                               seed=SEED,
-                               max_tokens=MAX_TOKENS,
-                               stop_token_ids=[stop_token_id]))
+    outputs = llm.generate(
+        example_prompts,
+        sampling_params=SamplingParams(
+            ignore_eos=True,
+            seed=SEED,
+            max_tokens=MAX_TOKENS,
+            stop_token_ids=[stop_token_id],
+        ),
+    )
    for output in outputs:
        output = output.outputs[0]
        assert output.finish_reason == "stop"
        assert output.stop_reason == stop_token_id

    # test stop string
-    outputs = llm.generate(example_prompts,
-                           sampling_params=SamplingParams(
-                               ignore_eos=True,
-                               seed=SEED,
-                               max_tokens=MAX_TOKENS,
-                               stop="."))
+    outputs = llm.generate(
+        example_prompts,
+        sampling_params=SamplingParams(
+            ignore_eos=True, seed=SEED, max_tokens=MAX_TOKENS, stop="."
+        ),
+    )
    for output in outputs:
        output = output.outputs[0]
        assert output.finish_reason == "stop"
        assert output.stop_reason == STOP_STR

    # test EOS token
-    outputs = llm.generate(example_prompts,
-                           sampling_params=SamplingParams(
-                               seed=SEED, max_tokens=MAX_TOKENS))
+    outputs = llm.generate(
+        example_prompts,
+        sampling_params=SamplingParams(seed=SEED, max_tokens=MAX_TOKENS),
+    )
    for output in outputs:
        output = output.outputs[0]
        assert output.finish_reason == "length" or (
-            output.finish_reason == "stop" and output.stop_reason is None)
+            output.finish_reason == "stop" and output.stop_reason is None
+        )
--- a/tests/detokenizer/test_stop_string_while_stop_model_terminates.py
+++ b/tests/detokenizer/test_stop_string_while_stop_model_terminates.py
@@ -14,7 +14,6 @@ def include_stop_str_in_output(request):


 class _DummyDetokenizer(BaseIncrementalDetokenizer):
-
    def __init__(self, request: EngineCoreRequest):
        super().__init__(request)

@@ -27,7 +26,8 @@ def _make_request(stop, include_stop_str_in_output: bool, min_tokens: int = 0):
    params = SamplingParams(
        stop=stop,
        include_stop_str_in_output=include_stop_str_in_output,
-        min_tokens=min_tokens)
+        min_tokens=min_tokens,
+    )
    # Keep other fields minimal for unit test purposes.
    req = EngineCoreRequest(
        request_id="test",
@@ -44,26 +44,25 @@ def _make_request(stop, include_stop_str_in_output: bool, min_tokens: int = 0):
    return req


-def test_stop_string_while_stop_token_terminates(
-        include_stop_str_in_output: bool):
+def test_stop_string_while_stop_token_terminates(include_stop_str_in_output: bool):
    """
    This test verifies that the detokenizer correctly handles the case where
    the generated token sequence contains both:
    - a stop token
    - an <eos> token
-    
+
    The detokenizer should respect the stop string and truncate the output
    accordingly.
-    
+
    Imagine the following sequence:
    - "abcdeZ" is generated, where "Z" is the <eos> token.
    - "cd" is the stop string.
-    
+
    If include_stop_str_in_output=False, the detokenizer should truncate the
    output to "ab" because the stop string "cd" is excluded.
    If include_stop_str_in_output=True, the detokenizer should include the stop
    string "cd" in the output, resulting in "abcd".
-    
+

    This verifies the behavioral change introduced in BaseIncrementalDetokenizer
    where stop-string evaluation occurs before the early-return on
@@ -78,8 +77,9 @@ def test_stop_string_while_stop_token_terminates(
    token_ids = [ord(c) for c in generated_text]

    # Create a request with the stop string and initialize the detokenizer.
-    req = _make_request(stop=[stop_string],
-                        include_stop_str_in_output=include_stop_str_in_output)
+    req = _make_request(
+        stop=[stop_string], include_stop_str_in_output=include_stop_str_in_output
+    )
    detok = _DummyDetokenizer(req)

    # Simulate that the last token ('Z') is a stop token (stop_terminated=True).
@@ -99,5 +99,4 @@ def test_stop_string_while_stop_token_terminates(

    # get_next_output_text should return the full text when finished=True.
    # (Buffering only applies during streaming when finished=False.)
-    assert detok.get_next_output_text(finished=True,
-                                      delta=False) == expected_text
+    assert detok.get_next_output_text(finished=True, delta=False) == expected_text
--- a/tests/detokenizer/test_stop_strings.py
+++ b/tests/detokenizer/test_stop_strings.py
@@ -11,12 +11,14 @@ MODEL = "meta-llama/llama-2-7b-hf"
 MAX_TOKENS = 200


-def _test_stopping(llm: LLM,
-                   expected_output: str,
-                   expected_reason: Any,
-                   stop: Optional[list[str]] = None,
-                   stop_token_ids: Optional[list[int]] = None,
-                   include_in_output: bool = False) -> None:
+def _test_stopping(
+    llm: LLM,
+    expected_output: str,
+    expected_reason: Any,
+    stop: Optional[list[str]] = None,
+    stop_token_ids: Optional[list[int]] = None,
+    include_in_output: bool = False,
+) -> None:
    output = llm.generate(
        "A story about vLLM:\n",
        SamplingParams(
@@ -25,7 +27,8 @@ def _test_stopping(llm: LLM,
            stop=stop,
            stop_token_ids=stop_token_ids,
            include_stop_str_in_output=include_in_output,
-        ))[0].outputs[0]
+        ),
+    )[0].outputs[0]

    assert output is not None
    assert output.text == expected_output
@@ -33,17 +36,21 @@ def _test_stopping(llm: LLM,


 def _stop_basic(llm):
-    _test_stopping(llm,
-                   stop=["."],
-                   include_in_output=False,
-                   expected_output="VLLM is a 100% volunteer organization",
-                   expected_reason=".")
+    _test_stopping(
+        llm,
+        stop=["."],
+        include_in_output=False,
+        expected_output="VLLM is a 100% volunteer organization",
+        expected_reason=".",
+    )

-    _test_stopping(llm,
-                   stop=["."],
-                   include_in_output=True,
-                   expected_output="VLLM is a 100% volunteer organization.",
-                   expected_reason=".")
+    _test_stopping(
+        llm,
+        stop=["."],
+        include_in_output=True,
+        expected_output="VLLM is a 100% volunteer organization.",
+        expected_reason=".",
+    )


 def _stop_multi_tokens(llm):
@@ -52,45 +59,54 @@ def _stop_multi_tokens(llm):
        stop=["group of peo", "short"],
        include_in_output=False,
        expected_output="VLLM is a 100% volunteer organization. We are a ",
-        expected_reason="group of peo")
+        expected_reason="group of peo",
+    )

    _test_stopping(
        llm,
        stop=["group of peo", "short"],
        include_in_output=True,
-        expected_output=
-        "VLLM is a 100% volunteer organization. We are a group of peo",
-        expected_reason="group of peo")
+        expected_output="VLLM is a 100% volunteer organization. We are a group of peo",
+        expected_reason="group of peo",
+    )


 def _stop_partial_token(llm):
-    _test_stopping(llm,
-                   stop=["gani"],
-                   include_in_output=False,
-                   expected_output="VLLM is a 100% volunteer or",
-                   expected_reason="gani")
+    _test_stopping(
+        llm,
+        stop=["gani"],
+        include_in_output=False,
+        expected_output="VLLM is a 100% volunteer or",
+        expected_reason="gani",
+    )

-    _test_stopping(llm,
-                   stop=["gani"],
-                   include_in_output=True,
-                   expected_output="VLLM is a 100% volunteer organi",
-                   expected_reason="gani")
+    _test_stopping(
+        llm,
+        stop=["gani"],
+        include_in_output=True,
+        expected_output="VLLM is a 100% volunteer organi",
+        expected_reason="gani",
+    )


 def _stop_token_id(llm):
    # token id 13013 => " organization"

-    _test_stopping(llm,
-                   stop_token_ids=[13013],
-                   include_in_output=False,
-                   expected_output="VLLM is a 100% volunteer",
-                   expected_reason=13013)
+    _test_stopping(
+        llm,
+        stop_token_ids=[13013],
+        include_in_output=False,
+        expected_output="VLLM is a 100% volunteer",
+        expected_reason=13013,
+    )

-    _test_stopping(llm,
-                   stop_token_ids=[13013],
-                   include_in_output=True,
-                   expected_output="VLLM is a 100% volunteer organization",
-                   expected_reason=13013)
+    _test_stopping(
+        llm,
+        stop_token_ids=[13013],
+        include_in_output=True,
+        expected_output="VLLM is a 100% volunteer organization",
+        expected_reason=13013,
+    )


@pytest.mark.skip_global_cleanup