[FEAT][ROCm]: Support AITER MLA on V1 Engine (#17523)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Co-authored-by: qli88 <qiang.li2@amd.com>
Co-authored-by: Hongxia Yang <62075498+hongxiayang@users.noreply.github.com>
This commit is contained in:
vllmellm
2025-05-09 10:42:05 +08:00
committed by GitHub
parent 376786fac1
commit 3c9396a64f
10 changed files with 269 additions and 14 deletions

View File

@@ -48,7 +48,8 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA")
backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False,
False, True)
assert backend.get_name() == "ROCM_AITER_MLA"
assert (backend.get_name() == "ROCM_AITER_MLA"
or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")
# If attention backend is None
# If use_mla is true
@@ -58,4 +59,5 @@ def test_selector(monkeypatch: pytest.MonkeyPatch):
m.setenv("VLLM_ROCM_USE_AITER", "1")
backend = get_attn_backend(576, torch.bfloat16, "auto", 1, False,
False, True)
assert backend.get_name() == "ROCM_AITER_MLA"
assert (backend.get_name() == "ROCM_AITER_MLA"
or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1")