Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/quantization/test_blackwell_moe.py
+++ b/tests/quantization/test_blackwell_moe.py
@@ -11,8 +11,9 @@ from tests.utils import RemoteOpenAIServer
 from vllm.platforms import current_platform

 if not current_platform.is_device_capability(100):
-    pytest.skip("This test only runs on Blackwell GPUs (SM100).",
-                allow_module_level=True)
+    pytest.skip(
+        "This test only runs on Blackwell GPUs (SM100).", allow_module_level=True
+    )

 os.environ["FLASHINFER_NVCC_THREADS"] = "16"

@@ -22,7 +23,6 @@ dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4}


 def can_initialize(model: str, extra_args: Optional[list[str]] = None):
-
    # Server arguments
    extra_args = extra_args if extra_args is not None else []
    server_args = [
@@ -40,10 +40,11 @@ def can_initialize(model: str, extra_args: Optional[list[str]] = None):

    # Launch server and make a simple request
    with RemoteOpenAIServer(
-            model,
-            server_args,
-            max_wait_seconds=1000,  # Due to FlashInfer compile
-            override_hf_configs=dummy_hf_overrides) as server:
+        model,
+        server_args,
+        max_wait_seconds=1000,  # Due to FlashInfer compile
+        override_hf_configs=dummy_hf_overrides,
+    ) as server:
        client = server.get_client()
        # Make a simple request to verify the server works
        completion = client.completions.create(
@@ -59,20 +60,21 @@ def can_initialize(model: str, extra_args: Optional[list[str]] = None):
 ## Llama4 ##


-@pytest.mark.skip(reason=(
-    "RuntimeError: run_moe() Expected a value of type "
-    "'Optional[List[Tensor]]' for argument '_9' but instead found type "
-    "'list'."))
-def test_llama4_fp8_tensor_moe_flashinfer_cutlass(
-        monkeypatch: pytest.MonkeyPatch):
+@pytest.mark.skip(
+    reason=(
+        "RuntimeError: run_moe() Expected a value of type "
+        "'Optional[List[Tensor]]' for argument '_9' but instead found type "
+        "'list'."
+    )
+)
+def test_llama4_fp8_tensor_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP8", "1")
    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput")
    can_initialize("nvidia/Llama-4-Scout-17B-16E-Instruct-FP8")


@pytest.mark.skip(reason="Works, but takes too long to run")
-def test_llama4_fp8_tensor_moe_flashinfer_trtllm(
-        monkeypatch: pytest.MonkeyPatch):
+def test_llama4_fp8_tensor_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP8", "1")
    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "latency")
    can_initialize("nvidia/Llama-4-Scout-17B-16E-Instruct-FP8")
@@ -100,24 +102,25 @@ def test_deepseek_fp8_block_moe_deep_gemm(monkeypatch: pytest.MonkeyPatch):
    can_initialize("deepseek-ai/DeepSeek-V3.1")


-@pytest.mark.skip(reason=("Known issue: lack of kernel support. "
-                          "Expected failure: assert self.block_quant is None"))
-def test_deepseek_fp8_block_moe_flashinfer_cutlass(
-        monkeypatch: pytest.MonkeyPatch):
+@pytest.mark.skip(
+    reason=(
+        "Known issue: lack of kernel support. "
+        "Expected failure: assert self.block_quant is None"
+    )
+)
+def test_deepseek_fp8_block_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP8", "1")
    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput")
    can_initialize("deepseek-ai/DeepSeek-V3.1")


-def test_deepseek_fp8_block_moe_flashinfer_trtllm(
-        monkeypatch: pytest.MonkeyPatch):
+def test_deepseek_fp8_block_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP8", "1")
    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "latency")
    can_initialize("deepseek-ai/DeepSeek-V3.1")


-def test_deepseek_nvfp4_moe_flashinfer_cutlass(
-        monkeypatch: pytest.MonkeyPatch):
+def test_deepseek_nvfp4_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP4", "1")
    monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput")
    can_initialize("nvidia/DeepSeek-R1-0528-FP4-v2")
@@ -138,13 +141,11 @@ def test_gptoss_mxfp4bf16_moe_flashinfer(monkeypatch: pytest.MonkeyPatch):
    can_initialize("openai/gpt-oss-20b")


-def test_gptoss_mxfp4mxfp8_moe_flashinfer_cutlass(
-        monkeypatch: pytest.MonkeyPatch):
+def test_gptoss_mxfp4mxfp8_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS", "1")
    can_initialize("openai/gpt-oss-20b")


-def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(
-        monkeypatch: pytest.MonkeyPatch):
+def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch):
    monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1")
    can_initialize("openai/gpt-oss-20b")