[V0 Deprecation] Deprecate BlockSparse Attention & Phi3-Small (#21217)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
2025-07-19 13:53:17 -07:00
parent 881e3cbe3b
commit 752c6ade2e
38 changed files with 65 additions and 2435 deletions
--- a/tests/kernels/attention/test_rocm_attention_selector.py
+++ b/tests/kernels/attention/test_rocm_attention_selector.py
@@ -33,8 +33,12 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):

        # change the attention backend to triton MLA
        m.setenv(STR_BACKEND_ENV_VAR, "TRITON_MLA")
-        backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False,
-                                   False, True)
+        backend = get_attn_backend(576,
+                                   torch.bfloat16,
+                                   "auto",
+                                   16,
+                                   False,
+                                   use_mla=True)
        assert (backend.get_name() == "TRITON_MLA"
                or backend.get_name() == "TRITON_MLA_VLLM_V1")

@@ -42,15 +46,23 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
        # If use_mla is true
        # The selected backend is triton MLA
        m.setenv(STR_BACKEND_ENV_VAR, None)
-        backend = get_attn_backend(576, torch.bfloat16, "auto", 16, False,
-                                   False, True)
+        backend = get_attn_backend(576,
+                                   torch.bfloat16,
+                                   "auto",
+                                   16,
+                                   False,
+                                   use_mla=True)
        assert (backend.get_name() == "TRITON_MLA"
                or backend.get_name() == "TRITON_MLA_VLLM_V1")

        # change the attention backend to AITER MLA
        m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA")
-        backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False,
-                                   False, True)
+        backend = get_attn_backend(576,
+                                   torch.bfloat16,
+                                   "auto",
+                                   1,
+                                   False,
+                                   use_mla=True)
        assert (backend.get_name() == "ROCM_AITER_MLA"
                or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")

@@ -60,7 +72,11 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
        # The selected backend is ROCM_AITER_MLA
        m.setenv(STR_BACKEND_ENV_VAR, None)
        m.setenv("VLLM_ROCM_USE_AITER", "1")
-        backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False,
-                                   False, True)
+        backend = get_attn_backend(576,
+                                   torch.bfloat16,
+                                   "auto",
+                                   1,
+                                   False,
+                                   use_mla=True)
        assert (backend.get_name() == "ROCM_AITER_MLA"
                or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")