[V0 deprecation] Remove V0 CPU/XPU/TPU backends (#20412)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Signed-off-by: jiang1.li <jiang1.li@intel.com> Co-authored-by: Li, Jiang <jiang1.li@intel.com>
2025-07-06 08:48:13 -07:00
parent 43813e6361
commit e202dd2736
20 changed files with 46 additions and 5034 deletions
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -36,7 +36,8 @@ DEVICE_REGULAR_ATTN_BACKENDS = {
 DEVICE_MLA_BLOCK_SIZES = {
    "cuda": [16, 64],  # CUDA supports both standard and extended block sizes
    "hip": [16, 1],  # HIP requires special handling for block_size=1
-    "cpu": [16]  # CPU uses fixed block size from test cases
+    # "cpu": [16]  # CPU uses fixed block size from test cases
+    "cpu": []  # FIXME(woosuk): Temporarily disable CPU tests
 }


@@ -81,14 +82,14 @@ def test_env(
        m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0")

        if device == "cpu":
+            if not use_v1:
+                pytest.skip("CPU backend only supports V1")
+
            with patch("vllm.attention.selector.current_platform",
                       CpuPlatform()):
                backend = get_attn_backend(16, torch.float16, torch.float16,
                                           block_size, False)
-            if use_v1:
-                assert backend.get_name() == "TORCH_SDPA_VLLM_V1"
-            else:
-                assert backend.get_name() == "TORCH_SDPA"
+            assert backend.get_name() == "TORCH_SDPA_VLLM_V1"

        elif device == "hip":
            with patch("vllm.attention.selector.current_platform",
@@ -193,12 +194,14 @@ def test_fp32_fallback(
        m.setenv("VLLM_USE_V1", "1" if use_v1 else "0")

        if device == "cpu":
+            if not use_v1:
+                pytest.skip("CPU backend only supports V1")
+
            with patch("vllm.attention.selector.current_platform",
                       CpuPlatform()):
                backend = get_attn_backend(16, torch.float32, torch.float32,
                                           16, False)
-            assert (backend.get_name() == "TORCH_SDPA_VLLM_V1"
-                    if use_v1 else "TORCH_SDPA")
+            assert backend.get_name() == "TORCH_SDPA_VLLM_V1"

        elif device == "cuda":
            with patch("vllm.attention.selector.current_platform",