Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/kernels/moe/test_flashinfer_moe.py
+++ b/tests/kernels/moe/test_flashinfer_moe.py
@@ -4,26 +4,33 @@ import pytest
 import torch

 from tests.kernels.moe.utils import make_test_quant_config
-from tests.kernels.quantization.nvfp4_utils import (FLOAT4_E2M1_MAX,
-                                                    FLOAT8_E4M3_MAX,
-                                                    dequantize_nvfp4_to_dtype)
+from tests.kernels.quantization.nvfp4_utils import (
+    FLOAT4_E2M1_MAX,
+    FLOAT8_E4M3_MAX,
+    dequantize_nvfp4_to_dtype,
+)
 from tests.kernels.utils import torch_moe
 from vllm import _custom_ops as ops
 from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config
 from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import (
-    FlashInferExperts, is_valid_flashinfer_cutlass_fused_moe)
+    FlashInferExperts,
+    is_valid_flashinfer_cutlass_fused_moe,
+)
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk
-from vllm.model_executor.layers.fused_moe.modular_kernel import (
-    FusedMoEModularKernel)
+from vllm.model_executor.layers.fused_moe.modular_kernel import FusedMoEModularKernel
 from vllm.model_executor.layers.fused_moe.prepare_finalize import (
-    MoEPrepareAndFinalizeNoEP)
+    MoEPrepareAndFinalizeNoEP,
+)
 from vllm.platforms import current_platform
 from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe

-if not has_flashinfer_cutlass_fused_moe(
-) or not current_platform.has_device_capability(100):
-    pytest.skip("Requires flashinfer_cutlass_fused_moe and nvfp4 support",
-                allow_module_level=True)
+if not has_flashinfer_cutlass_fused_moe() or not current_platform.has_device_capability(
+    100
+):
+    pytest.skip(
+        "Requires flashinfer_cutlass_fused_moe and nvfp4 support",
+        allow_module_level=True,
+    )

 MNK_FACTORS = [
    (2, 1024, 1024),
@@ -44,13 +51,13 @@ MNK_FACTORS = [
@pytest.mark.parametrize("topk", [1, 6, 8])
@pytest.mark.parametrize("dtype", [torch.half, torch.bfloat16])
@torch.inference_mode()
-def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int,
-                                     dtype: torch.dtype):
+def test_flashinfer_fp4_moe_no_graph(
+    m: int, n: int, k: int, e: int, topk: int, dtype: torch.dtype
+):
    current_platform.seed_everything(7)
    with set_current_vllm_config(
-            VllmConfig(parallel_config=ParallelConfig(
-                pipeline_parallel_size=1))):
-
+        VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1))
+    ):
        a = torch.randn((m, k), device="cuda", dtype=dtype) / 10

        quant_blocksize = 16
@@ -66,10 +73,7 @@ def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int,
        )

        score = torch.randn((m, e), device="cuda", dtype=dtype)
-        topk_weights, topk_ids, _ = fused_topk(a,
-                                               score,
-                                               topk,
-                                               renormalize=False)
+        topk_weights, topk_ids, _ = fused_topk(a, score, topk, renormalize=False)

        assert is_valid_flashinfer_cutlass_fused_moe(a, w1_q, w2_q)

@@ -87,16 +91,19 @@ def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int,
        )

        # Reference check:
-        a_global_scale = ((FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX) /
-                          torch.amax(a.flatten(), dim=-1)).to(torch.float32)
+        a_global_scale = (
+            (FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX) / torch.amax(a.flatten(), dim=-1)
+        ).to(torch.float32)
        a_fp4, a_scale_interleaved = ops.scaled_fp4_quant(a, a_global_scale)
        _, m_k = a_fp4.shape
-        a_in_dtype = dequantize_nvfp4_to_dtype(a_fp4,
-                                               a_scale_interleaved,
-                                               a_global_scale,
-                                               dtype=a.dtype,
-                                               device=a.device,
-                                               block_size=quant_blocksize)
+        a_in_dtype = dequantize_nvfp4_to_dtype(
+            a_fp4,
+            a_scale_interleaved,
+            a_global_scale,
+            dtype=a.dtype,
+            device=a.device,
+            block_size=quant_blocksize,
+        )

        w1_d = torch.empty((e, 2 * n, k), device="cuda", dtype=dtype)
        w2_d = torch.empty((e, k, n), device="cuda", dtype=dtype)
@@ -104,23 +111,26 @@ def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int,
        for idx in range(0, e):
            w1_d[idx] = dequantize_nvfp4_to_dtype(
                w1_q[idx],
-                quant_config.w1_scale[idx], (1 / quant_config.g1_alphas[idx]),
+                quant_config.w1_scale[idx],
+                (1 / quant_config.g1_alphas[idx]),
                dtype=dtype,
                device=w1_q.device,
-                block_size=quant_blocksize)
+                block_size=quant_blocksize,
+            )
            w2_d[idx] = dequantize_nvfp4_to_dtype(
                w2_q[idx],
-                quant_config.w2_scale[idx], (1 / quant_config.g2_alphas[idx]),
+                quant_config.w2_scale[idx],
+                (1 / quant_config.g2_alphas[idx]),
                dtype=dtype,
                device=w2_q.device,
-                block_size=quant_blocksize)
+                block_size=quant_blocksize,
+            )

        torch_output = torch_moe(a_in_dtype, w1_d, w2_d, score, topk)

-        torch.testing.assert_close(torch_output,
-                                   flashinfer_output,
-                                   atol=1e-1,
-                                   rtol=1e-1)
+        torch.testing.assert_close(
+            torch_output, flashinfer_output, atol=1e-1, rtol=1e-1
+        )


 if __name__ == "__main__":