Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/models/test_transformers.py
+++ b/tests/models/test_transformers.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 """Test the functionality of the Transformers backend."""
+
 from typing import Any, Optional, Union

 import pytest
@@ -60,14 +61,16 @@ def check_implementation(

@pytest.mark.skipif(
    current_platform.is_rocm(),
-    reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.")
+    reason="Llama-3.2-1B-Instruct, Ilama-3.2-1B produce memory access fault.",
+)
@pytest.mark.parametrize(
    "model,model_impl",
    [
        ("meta-llama/Llama-3.2-1B-Instruct", "transformers"),
        ("hmellor/Ilama-3.2-1B", "auto"),  # CUSTOM CODE
        ("allenai/OLMoE-1B-7B-0924", "transformers"),  # MoE
-    ])  # trust_remote_code=True by default
+    ],
+)  # trust_remote_code=True by default
 def test_models(
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
@@ -77,29 +80,32 @@ def test_models(
 ) -> None:
    import transformers
    from packaging.version import Version
+
    installed = Version(transformers.__version__)
    required = Version("4.57.0.dev0")
    if model == "allenai/OLMoE-1B-7B-0924" and installed < required:
-        pytest.skip("MoE models with the Transformers backend require "
-                    f"transformers>={required}, but got {installed}")
+        pytest.skip(
+            "MoE models with the Transformers backend require "
+            f"transformers>={required}, but got {installed}"
+        )

-    check_implementation(hf_runner,
-                         vllm_runner,
-                         example_prompts,
-                         model,
-                         model_impl=model_impl)
+    check_implementation(
+        hf_runner, vllm_runner, example_prompts, model, model_impl=model_impl
+    )


 def test_hybrid_attention(vllm_runner: type[VllmRunner]) -> None:
    prompts, _, _ = prep_prompts(4, (800, 801))
    kwargs_ref = {"max_model_len": 8192, "enforce_eager": True}
    kwargs_test = {"model_impl": "transformers", **kwargs_ref}
-    check_implementation(vllm_runner,
-                         vllm_runner,
-                         prompts,
-                         model="hmellor/tiny-random-Gemma2ForCausalLM",
-                         kwargs_ref=kwargs_ref,
-                         kwargs_test=kwargs_test)
+    check_implementation(
+        vllm_runner,
+        vllm_runner,
+        prompts,
+        model="hmellor/tiny-random-Gemma2ForCausalLM",
+        kwargs_ref=kwargs_ref,
+        kwargs_test=kwargs_test,
+    )


@multi_gpu_test(num_gpus=2)
@@ -109,23 +115,28 @@ def test_distributed(
    example_prompts,
 ):
    kwargs = {"model_impl": "transformers", "tensor_parallel_size": 2}
-    check_implementation(hf_runner,
-                         vllm_runner,
-                         example_prompts,
-                         "meta-llama/Llama-3.2-1B-Instruct",
-                         kwargs_test=kwargs)
-
-
-@pytest.mark.parametrize("model, quantization_kwargs", [
-    ("TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", {}),
-    ("TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ", {}),
-    (
+    check_implementation(
+        hf_runner,
+        vllm_runner,
+        example_prompts,
        "meta-llama/Llama-3.2-1B-Instruct",
-        {
-            "quantization": "bitsandbytes",
-        },
-    ),
-])
+        kwargs_test=kwargs,
+    )
+
+
+@pytest.mark.parametrize(
+    "model, quantization_kwargs",
+    [
+        ("TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", {}),
+        ("TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ", {}),
+        (
+            "meta-llama/Llama-3.2-1B-Instruct",
+            {
+                "quantization": "bitsandbytes",
+            },
+        ),
+    ],
+)
@pytest.mark.parametrize("max_tokens", [32])
@pytest.mark.parametrize("num_logprobs", [5])
 def test_quantization(
@@ -136,27 +147,34 @@ def test_quantization(
    max_tokens: int,
    num_logprobs: int,
 ) -> None:
-    if (current_platform.is_rocm()
-            and quantization_kwargs.get("quantization", "") == "bitsandbytes"):
-        pytest.skip(
-            "bitsandbytes quantization is currently not supported in rocm.")
+    if (
+        current_platform.is_rocm()
+        and quantization_kwargs.get("quantization", "") == "bitsandbytes"
+    ):
+        pytest.skip("bitsandbytes quantization is currently not supported in rocm.")

    with vllm_runner(
-            model, model_impl="auto", enforce_eager=True,
-            **quantization_kwargs) as vllm_model:  # type: ignore[arg-type]
+        model,
+        model_impl="auto",
+        enforce_eager=True,
+        **quantization_kwargs,  # type: ignore[arg-type]
+    ) as vllm_model:
        vllm_outputs = vllm_model.generate_greedy_logprobs(
-            example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs)
+            example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs
+        )

    with vllm_runner(
-            model,
-            model_impl="transformers",
-            enforce_eager=True,
-            **quantization_kwargs) as vllm_model:  # type: ignore[arg-type]
+        model,
+        model_impl="transformers",
+        enforce_eager=True,
+        **quantization_kwargs,  # type: ignore[arg-type]
+    ) as vllm_model:
        model_config = vllm_model.llm.llm_engine.model_config
        assert model_config.using_transformers_backend()

        transformers_outputs = vllm_model.generate_greedy_logprobs(
-            example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs)
+            example_prompts, max_tokens=max_tokens, num_logprobs=num_logprobs
+        )

    check_logprobs_close(
        outputs_0_lst=transformers_outputs,
@@ -172,22 +190,24 @@ def test_quantization(
        # Layers live in `layers`
        "Qwen/Qwen3-Embedding-0.6B",
        # Layers live in `model.layers`
-        "meta-llama/Llama-3.2-1B-Instruct"
+        "meta-llama/Llama-3.2-1B-Instruct",
    ],
 )
 def test_embed_loading(vllm_runner, model):
-    with vllm_runner(model,
-                     max_model_len=1024,
-                     enforce_eager=True,
-                     runner="pooling",
-                     model_impl="transformers") as model_test:
+    with vllm_runner(
+        model,
+        max_model_len=1024,
+        enforce_eager=True,
+        runner="pooling",
+        model_impl="transformers",
+    ) as model_test:
        model_config = model_test.llm.llm_engine.model_config
        assert model_config.using_transformers_backend()


@pytest.mark.parametrize(
-    "arch",
-    ["TransformersEmbeddingModel", "TransformersForSequenceClassification"])
+    "arch", ["TransformersEmbeddingModel", "TransformersForSequenceClassification"]
+)
 def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
    model = get_model(arch)

@@ -202,6 +222,7 @@ def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
        hf_kwargs["is_sentence_transformer"] = True
    elif arch == "TransformersForSequenceClassification":
        from transformers import AutoModelForSequenceClassification
+
        hf_kwargs["auto_cls"] = AutoModelForSequenceClassification

    # The example_prompts has ending "\n", for example:
@@ -212,8 +233,10 @@ def test_pooling(hf_runner, vllm_runner, example_prompts, arch):
    # So we need to strip the input texts to avoid test failing.
    example_prompts = [str(s).strip() for s in example_prompts]

-    with (vllm_runner(model, **vllm_kwargs) as
-          vllm_model, hf_runner(model, **hf_kwargs) as hf_model):
+    with (
+        vllm_runner(model, **vllm_kwargs) as vllm_model,
+        hf_runner(model, **hf_kwargs) as hf_model,
+    ):
        model_config = vllm_model.llm.llm_engine.model_config
        assert model_config.using_transformers_backend()