[FEAT][ROCm]: Support AITER MLA on V1 Engine (#17523)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com> Co-authored-by: qli88 <qiang.li2@amd.com> Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com>
2025-05-09 10:42:05 +08:00
parent 376786fac1
commit 3c9396a64f
10 changed files with 269 additions and 14 deletions
--- a/tests/kernels/attention/test_rocm_attention_selector.py
+++ b/tests/kernels/attention/test_rocm_attention_selector.py
@@ -48,7 +48,8 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
        m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA")
        backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False,
                                   False, True)
-        assert backend.get_name() == "ROCM_AITER_MLA"
+        assert (backend.get_name() == "ROCM_AITER_MLA"
+                or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")

        # If attention backend is None
        # If use_mla is true
@@ -58,4 +59,5 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
        m.setenv("VLLM_ROCM_USE_AITER", "1")
        backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False,
                                   False, True)
-        assert backend.get_name() == "ROCM_AITER_MLA"
+        assert (backend.get_name() == "ROCM_AITER_MLA"
+                or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")