[V0 deprecation] Remove _VLLM_V1 suffixes from attention backend names (#25489)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
2025-09-25 13:37:50 -04:00
parent 71b25b0d48
commit 3468f17ebe
42 changed files with 131 additions and 174 deletions
--- a/tests/kernels/attention/test_rocm_attention_selector.py
+++ b/tests/kernels/attention/test_rocm_attention_selector.py
@@ -28,7 +28,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
        # Test standard ROCm attention
        backend = get_attn_backend(16, torch.float16, torch.float16, 16, False)
        assert (backend.get_name() == "ROCM_FLASH"
-                or backend.get_name() == "TRITON_ATTN_VLLM_V1")
+                or backend.get_name() == "TRITON_ATTN")

        # MLA test for deepseek related

@@ -40,8 +40,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
                                   16,
                                   False,
                                   use_mla=True)
-        assert (backend.get_name() == "TRITON_MLA"
-                or backend.get_name() == "TRITON_MLA_VLLM_V1")
+        assert backend.get_name() == "TRITON_MLA"

        # If attention backend is None
        # If use_mla is true
@@ -53,8 +52,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
                                   16,
                                   False,
                                   use_mla=True)
-        assert (backend.get_name() == "TRITON_MLA"
-                or backend.get_name() == "TRITON_MLA_VLLM_V1")
+        assert backend.get_name() == "TRITON_MLA"

        # change the attention backend to AITER MLA
        m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA")
@@ -64,8 +62,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
                                   1,
                                   False,
                                   use_mla=True)
-        assert (backend.get_name() == "ROCM_AITER_MLA"
-                or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")
+        assert backend.get_name() == "ROCM_AITER_MLA"

        # If attention backend is None
        # If use_mla is true
@@ -79,5 +76,4 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
                                   1,
                                   False,
                                   use_mla=True)
-        assert (backend.get_name() == "ROCM_AITER_MLA"
-                or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")
+        assert backend.get_name() == "ROCM_AITER_MLA"