[V0 deprecation] Remove _VLLM_V1 suffixes from attention backend names (#25489)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
Signed-off-by: Matthew Bonanni <mbonanni001@gmail.com>
This commit is contained in:
Matthew Bonanni
2025-09-25 13:37:50 -04:00
committed by GitHub
parent 71b25b0d48
commit 3468f17ebe
42 changed files with 131 additions and 174 deletions

View File

@@ -28,7 +28,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
# Test standard ROCm attention
backend = get_attn_backend(16, torch.float16, torch.float16, 16, False)
assert (backend.get_name() == "ROCM_FLASH"
or backend.get_name() == "TRITON_ATTN_VLLM_V1")
or backend.get_name() == "TRITON_ATTN")
# MLA test for deepseek related
@@ -40,8 +40,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
16,
False,
use_mla=True)
assert (backend.get_name() == "TRITON_MLA"
or backend.get_name() == "TRITON_MLA_VLLM_V1")
assert backend.get_name() == "TRITON_MLA"
# If attention backend is None
# If use_mla is true
@@ -53,8 +52,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
16,
False,
use_mla=True)
assert (backend.get_name() == "TRITON_MLA"
or backend.get_name() == "TRITON_MLA_VLLM_V1")
assert backend.get_name() == "TRITON_MLA"
# change the attention backend to AITER MLA
m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA")
@@ -64,8 +62,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
1,
False,
use_mla=True)
assert (backend.get_name() == "ROCM_AITER_MLA"
or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")
assert backend.get_name() == "ROCM_AITER_MLA"
# If attention backend is None
# If use_mla is true
@@ -79,5 +76,4 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
1,
False,
use_mla=True)
assert (backend.get_name() == "ROCM_AITER_MLA"
or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")
assert backend.get_name() == "ROCM_AITER_MLA"