Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/v1/tpu/test_perf.py
+++ b/tests/v1/tpu/test_perf.py
@@ -4,6 +4,7 @@

 Run `pytest tests/v1/tpu/test_perf.py`.
 """
+
 from __future__ import annotations

 import time
@@ -37,7 +38,6 @@ TEST_PARAMS = [
    #   open(/dev/vfio/0): Device or resource busy: Device or resource busy;
    #   Couldn't open iommu group /dev/vfio/0
    # => Investigate
-
    # TestParams(
    #     model="Qwen/Qwen2.5-1.5B-Instruct",
    #     num_prompts=1,
@@ -59,16 +59,14 @@ TEST_PARAMS = [
        num_prompts=64,
        prefix_len=500,
        decode_len=50,
-
        # commit id: ccb246776d93ef105904a8ec015b3587240a1183
        # tpu: v5lite (old vllm CI/CD)
        # expected_avg_time=1.4,
        # err_tol=0.30,
-
        # (This is the active CI/CD instance)
        # commit id: ccb246776d93ef105904a8ec015b3587240a1183
        # tpu: v6e (current vllm CI/CD)
-        expected_avg_time=1.7,  # measured with VLLM_XLA_CACHE_PATH=  
+        expected_avg_time=1.7,  # measured with VLLM_XLA_CACHE_PATH=
        err_tol=0.20,
    ),
 ]
@@ -81,44 +79,50 @@ MAX_NUM_SEQS = 32
 GPU_UTIL = 0.9


-@pytest.mark.skipif(not current_platform.is_tpu(),
-                    reason="This is a basic performance test for TPU only")
+@pytest.mark.skipif(
+    not current_platform.is_tpu(),
+    reason="This is a basic performance test for TPU only",
+)
@pytest.mark.parametrize("params", TEST_PARAMS)
 def test_perf(
    vllm_runner: type[VllmRunner],
    monkeypatch: pytest.MonkeyPatch,
    params: TestParams,
 ) -> None:
-    tokenizer = get_tokenizer(params.model,
-                              tokenizer_mode="auto",
-                              trust_remote_code=True)
+    tokenizer = get_tokenizer(
+        params.model, tokenizer_mode="auto", trust_remote_code=True
+    )

    prompts = []
    for i in range(params.num_prompts):
-        prefix_token_ids = np.random.randint(0,
-                                             tokenizer.vocab_size,
-                                             size=params.prefix_len).tolist()
+        prefix_token_ids = np.random.randint(
+            0, tokenizer.vocab_size, size=params.prefix_len
+        ).tolist()
        prompt = tokenizer.decode(prefix_token_ids)
        prompts.append(prompt)

    print(
        "-- Running: num_prompts = {} prefix_len = {} decode_len = {}".format(
-            len(prompts), params.prefix_len, params.decode_len))
+            len(prompts), params.prefix_len, params.decode_len
+        )
+    )

    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "1")

-        sampling_params = SamplingParams(max_tokens=params.decode_len,
-                                         temperature=1.0,
-                                         min_p=0.0)
+        sampling_params = SamplingParams(
+            max_tokens=params.decode_len, temperature=1.0, min_p=0.0
+        )

-        with vllm_runner(params.model,
-                         max_num_batched_tokens=MAX_MODEL_LEN,
-                         max_model_len=MAX_MODEL_LEN,
-                         max_num_seqs=MAX_NUM_SEQS,
-                         gpu_memory_utilization=GPU_UTIL,
-                         enforce_eager=False,
-                         tensor_parallel_size=1) as vllm_model:
+        with vllm_runner(
+            params.model,
+            max_num_batched_tokens=MAX_MODEL_LEN,
+            max_model_len=MAX_MODEL_LEN,
+            max_num_seqs=MAX_NUM_SEQS,
+            gpu_memory_utilization=GPU_UTIL,
+            enforce_eager=False,
+            tensor_parallel_size=1,
+        ) as vllm_model:
            print("  -- Warmup / Compile")
            for i in range(NUM_WARMUPS):
                _ = vllm_model.generate(prompts, sampling_params)
@@ -133,14 +137,18 @@ def test_perf(
            avg_time = sum(times) / len(times)

            print("  -- avg_time = {}".format(avg_time))
-            print("  -- expected_avg_time = {} with err_tol = {}".format(
-                params.expected_avg_time, params.err_tol))
+            print(
+                "  -- expected_avg_time = {} with err_tol = {}".format(
+                    params.expected_avg_time, params.err_tol
+                )
+            )
            diff = avg_time - params.expected_avg_time
            ok = diff < params.err_tol
            if diff < -params.err_tol:
-                print("  !! WARNING !! Performance has improved by {}, "
-                      "it may be necessary to fine-tune the "
-                      "expected_avg_time = {}".format(
-                          -diff, params.expected_avg_time))
+                print(
+                    "  !! WARNING !! Performance has improved by {}, "
+                    "it may be necessary to fine-tune the "
+                    "expected_avg_time = {}".format(-diff, params.expected_avg_time)
+                )

            assert ok, " !! ERROR !! Regression detected"