Convert formatting to use ruff instead of yapf + isort (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-05 15:06:22 +01:00
parent 17edd8a807
commit d6953beb91
1508 changed files with 115244 additions and 94146 deletions
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -15,16 +15,18 @@ from vllm.utils import STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL, STR_INVALID_VAL

@pytest.fixture(autouse=True)
 def clear_cache():
-    """Clear lru cache to ensure each test case runs without caching.
-    """
+    """Clear lru cache to ensure each test case runs without caching."""
    _cached_get_attn_backend.cache_clear()


 # Define MLA and non-MLA backends separately
 DEVICE_MLA_BACKENDS = {
    "cuda": [
-        "TRITON_MLA", "FLASHMLA", "FLASHINFER_MLA", "FLASH_ATTN_MLA",
-        "CUTLASS_MLA"
+        "TRITON_MLA",
+        "FLASHMLA",
+        "FLASHINFER_MLA",
+        "FLASH_ATTN_MLA",
+        "CUTLASS_MLA",
    ],
    "hip": ["TRITON_MLA", "ROCM_AITER_MLA"],
    "cpu": [],
@@ -40,7 +42,7 @@ DEVICE_MLA_BLOCK_SIZES = {
    "cuda": [16, 64],  # CUDA supports both standard and extended block sizes
    "hip": [16, 1],  # HIP requires special handling for block_size=1
    # "cpu": [16]  # CPU uses fixed block size from test cases
-    "cpu": []  # FIXME(woosuk): Temporarily disable CPU tests
+    "cpu": [],  # FIXME(woosuk): Temporarily disable CPU tests
 }


@@ -48,12 +50,13 @@ def generate_params():
    params = []
    for use_mla in [True, False]:
        for device in ["cuda", "hip", "cpu"]:
-            backends = DEVICE_MLA_BACKENDS[
-                device] if use_mla else DEVICE_REGULAR_ATTN_BACKENDS[device]
+            backends = (
+                DEVICE_MLA_BACKENDS[device]
+                if use_mla
+                else DEVICE_REGULAR_ATTN_BACKENDS[device]
+            )
            for name in backends:
-                block_sizes = DEVICE_MLA_BLOCK_SIZES[device] if use_mla else [
-                    16
-                ]
+                block_sizes = DEVICE_MLA_BLOCK_SIZES[device] if use_mla else [16]
                for block_size in block_sizes:
                    params.append(
                        pytest.param(
@@ -61,14 +64,13 @@ def generate_params():
                            name,
                            use_mla,
                            block_size,
-                            id=
-                            f"{device}_{name}_mla_{str(use_mla)[0]}_blks{block_size}"
-                        ))
+                            id=f"{device}_{name}_mla_{str(use_mla)[0]}_blks{block_size}",
+                        )
+                    )
    return params


-@pytest.mark.parametrize("device, name, use_mla, block_size",
-                         generate_params())
+@pytest.mark.parametrize("device, name, use_mla, block_size", generate_params())
 def test_env(
    device: str,
    name: str,
@@ -83,14 +85,12 @@ def test_env(
        m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0")

        if device == "cpu":
-            with patch("vllm.attention.selector.current_platform",
-                       CpuPlatform()):
+            with patch("vllm.attention.selector.current_platform", CpuPlatform()):
                backend = get_attn_backend(16, torch.float16, None, block_size)
            assert backend.get_name() == "TORCH_SDPA"

        elif device == "hip":
-            with patch("vllm.attention.selector.current_platform",
-                       RocmPlatform()):
+            with patch("vllm.attention.selector.current_platform", RocmPlatform()):
                if use_mla:
                    # ROCm MLA backend logic:
                    # - TRITON_MLA: supported when block_size != 1
@@ -101,44 +101,33 @@ def test_env(
                    if name == "TRITON_MLA" and block_size == 1:
                        # TRITON_MLA doesn't support block_size == 1
                        with pytest.raises(ValueError) as exc_info:
-                            get_attn_backend(16,
-                                             torch.float16,
-                                             None,
-                                             block_size,
-                                             use_mla=use_mla)
-                        assert f"The selected backend, {name}" in str(
-                            exc_info.value)
+                            get_attn_backend(
+                                16, torch.float16, None, block_size, use_mla=use_mla
+                            )
+                        assert f"The selected backend, {name}" in str(exc_info.value)
                    elif name == "ROCM_AITER_MLA" and block_size != 1:
                        # ROCM_AITER_MLA only supports block_size == 1
                        with pytest.raises(ValueError) as exc_info:
-                            get_attn_backend(16,
-                                             torch.float16,
-                                             None,
-                                             block_size,
-                                             use_mla=use_mla)
-                        assert f"The selected backend, {name}" in str(
-                            exc_info.value)
+                            get_attn_backend(
+                                16, torch.float16, None, block_size, use_mla=use_mla
+                            )
+                        assert f"The selected backend, {name}" in str(exc_info.value)
                    else:
                        # Valid backend-block_size combination
-                        backend = get_attn_backend(16,
-                                                   torch.float16,
-                                                   None,
-                                                   block_size,
-                                                   use_mla=use_mla)
+                        backend = get_attn_backend(
+                            16, torch.float16, None, block_size, use_mla=use_mla
+                        )
                        expected = name
                        assert backend.get_name() == expected
                else:
-                    backend = get_attn_backend(16,
-                                               torch.float16,
-                                               None,
-                                               block_size,
-                                               use_mla=use_mla)
+                    backend = get_attn_backend(
+                        16, torch.float16, None, block_size, use_mla=use_mla
+                    )
                    expected = "TRITON_ATTN"
                    assert backend.get_name() == expected

        elif device == "cuda":
-            with patch("vllm.attention.selector.current_platform",
-                       CudaPlatform()):
+            with patch("vllm.attention.selector.current_platform", CudaPlatform()):
                if use_mla:
                    # CUDA MLA backend logic:
                    # - CUTLASS_MLA: only supported with block_size == 128
@@ -152,28 +141,23 @@ def test_env(
                    if name == "CUTLASS_MLA":
                        if block_size != 128:
                            # CUTLASS_MLA only supports block_size == 128
-                            pytest.skip(
-                                "CUTLASS_MLA only supports block_size 128")
+                            pytest.skip("CUTLASS_MLA only supports block_size 128")
                        else:
-                            backend = get_attn_backend(16,
-                                                       torch.float16,
-                                                       None,
-                                                       block_size,
-                                                       use_mla=use_mla)
+                            backend = get_attn_backend(
+                                16, torch.float16, None, block_size, use_mla=use_mla
+                            )
                            expected = "CUTLASS_MLA"
                            assert backend.get_name() == expected
                    elif name == "FLASHINFER_MLA":
                        if block_size not in [32, 64]:
                            # FlashInfer MLA only supports block_size 32 or 64
                            pytest.skip(
-                                "FlashInfer MLA only supports block_size 32 "
-                                "or 64")
+                                "FlashInfer MLA only supports block_size 32 or 64"
+                            )
                        else:
-                            backend = get_attn_backend(16,
-                                                       torch.float16,
-                                                       None,
-                                                       block_size,
-                                                       use_mla=use_mla)
+                            backend = get_attn_backend(
+                                16, torch.float16, None, block_size, use_mla=use_mla
+                            )
                            expected = "FLASHINFER_MLA"
                            assert backend.get_name() == expected
                    elif name == "FLASHMLA":
@@ -182,58 +166,47 @@ def test_env(
                            pytest.skip("FlashMLA only supports block_size 64")
                        else:
                            from vllm.v1.attention.backends.mla.flashmla import (  # noqa: E501
-                                is_flashmla_supported)
+                                is_flashmla_supported,
+                            )
+
                            is_supported, _ = is_flashmla_supported()
                            if not is_supported:
-                                pytest.skip(
-                                    "FlashMLA not supported on this platform")
+                                pytest.skip("FlashMLA not supported on this platform")
                            else:
-                                backend = get_attn_backend(16,
-                                                           torch.float16,
-                                                           None,
-                                                           block_size,
-                                                           use_mla=use_mla)
+                                backend = get_attn_backend(
+                                    16, torch.float16, None, block_size, use_mla=use_mla
+                                )
                                expected = name
                                assert backend.get_name() == expected
                    elif name == "FLASH_ATTN_MLA":
-                        backend = get_attn_backend(16,
-                                                   torch.float16,
-                                                   None,
-                                                   block_size,
-                                                   use_mla=use_mla)
+                        backend = get_attn_backend(
+                            16, torch.float16, None, block_size, use_mla=use_mla
+                        )
                        expected = "FLASH_ATTN_MLA"
                        assert backend.get_name() == expected
                    else:
                        # TRITON_MLA or other fallback
-                        backend = get_attn_backend(16,
-                                                   torch.float16,
-                                                   None,
-                                                   block_size,
-                                                   use_mla=use_mla)
+                        backend = get_attn_backend(
+                            16, torch.float16, None, block_size, use_mla=use_mla
+                        )
                        expected = "TRITON_MLA"
                        assert backend.get_name() == expected
                elif name == "FLASHINFER":
-                    backend = get_attn_backend(16,
-                                               torch.float16,
-                                               None,
-                                               block_size,
-                                               use_mla=use_mla)
+                    backend = get_attn_backend(
+                        16, torch.float16, None, block_size, use_mla=use_mla
+                    )
                    expected = "FLASHINFER"
                    assert backend.get_name() == expected
                elif name == "XFORMERS":
-                    backend = get_attn_backend(32,
-                                               torch.float16,
-                                               None,
-                                               block_size,
-                                               use_mla=use_mla)
+                    backend = get_attn_backend(
+                        32, torch.float16, None, block_size, use_mla=use_mla
+                    )
                    expected = "XFORMERS"
                    assert backend.get_name() == expected
                elif name == "FLASH_ATTN":
-                    backend = get_attn_backend(32,
-                                               torch.float16,
-                                               None,
-                                               block_size,
-                                               use_mla=use_mla)
+                    backend = get_attn_backend(
+                        32, torch.float16, None, block_size, use_mla=use_mla
+                    )
                    expected = "FLASH_ATTN"
                    assert backend.get_name() == expected

@@ -248,14 +221,12 @@ def test_fp32_fallback(
        m.setenv("VLLM_USE_V1", "1")

        if device == "cpu":
-            with patch("vllm.attention.selector.current_platform",
-                       CpuPlatform()):
+            with patch("vllm.attention.selector.current_platform", CpuPlatform()):
                backend = get_attn_backend(16, torch.float32, None, 16)
            assert backend.get_name() == "TORCH_SDPA"

        elif device == "cuda":
-            with patch("vllm.attention.selector.current_platform",
-                       CudaPlatform()):
+            with patch("vllm.attention.selector.current_platform", CudaPlatform()):
                backend = get_attn_backend(16, torch.float32, None, 16)
            assert backend.get_name() == "FLEX_ATTENTION"

@@ -265,16 +236,16 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):
    # TODO: When testing for v1, pipe in `use_v1` as an argument to
    # get_attn_backend

-    pytest.skip("Skipping as current backend selector does not " \
-                "handle fallbacks when a backend is set via env var.")
+    pytest.skip(
+        "Skipping as current backend selector does not "
+        "handle fallbacks when a backend is set via env var."
+    )

    with monkeypatch.context() as m:
        m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL)

        # Unsupported CUDA arch
-        monkeypatch.setattr(torch.cuda,
-                            "get_device_capability",
-                            lambda _=None: (7, 5))
+        monkeypatch.setattr(torch.cuda, "get_device_capability", lambda _=None: (7, 5))
        backend = get_attn_backend(16, torch.float16, None, 16)
        assert backend.get_name() != STR_FLASH_ATTN_VAL

@@ -295,17 +266,17 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):

        # flash-attn is not installed
        import sys
-        original_module = sys.modules.get('vllm_flash_attn')
-        monkeypatch.setitem(sys.modules, 'vllm_flash_attn', None)
+
+        original_module = sys.modules.get("vllm_flash_attn")
+        monkeypatch.setitem(sys.modules, "vllm_flash_attn", None)
        backend = get_attn_backend(16, torch.float16, None, 16)
        assert backend.get_name() != STR_FLASH_ATTN_VAL

        # Restore the original module if it existed
        if original_module is not None:
-            monkeypatch.setitem(sys.modules, 'vllm_flash_attn',
-                                original_module)
+            monkeypatch.setitem(sys.modules, "vllm_flash_attn", original_module)
        else:
-            monkeypatch.delitem(sys.modules, 'vllm_flash_attn', raising=False)
+            monkeypatch.delitem(sys.modules, "vllm_flash_attn", raising=False)

        # Unsupported head size
        backend = get_attn_backend(17, torch.float16, None, 16)
@@ -314,8 +285,10 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch):

 def test_invalid_env(monkeypatch: pytest.MonkeyPatch):
    """Test that invalid attention backend names raise ValueError."""
-    with monkeypatch.context() as m, patch(
-            "vllm.attention.selector.current_platform", CudaPlatform()):
+    with (
+        monkeypatch.context() as m,
+        patch("vllm.attention.selector.current_platform", CudaPlatform()),
+    ):
        m.setenv("VLLM_USE_V1", "1")
        m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL)