[BugFix][V1][ROCm] Triton MLA uses V0 backend on V1 engine (#19067)

Signed-off-by: Tianyuan Wu <Tianyuan.Wu@amd.com>
2025-07-01 16:12:19 +08:00
parent b1c1fe35a5
commit 96453cfa83
5 changed files with 78 additions and 10 deletions
--- a/tests/kernels/attention/test_attention_selector.py
+++ b/tests/kernels/attention/test_attention_selector.py
@@ -106,10 +106,8 @@ def test_env(
                                                   block_size,
                                                   False,
                                                   use_mla=use_mla)
-                        if use_v1 and name != "TRITON_MLA":
-                            assert backend.get_name() == f"{name}_VLLM_V1"
-                        else:
-                            assert backend.get_name() == name
+                        expected = f"{name}_VLLM_V1" if use_v1 else name
+                        assert backend.get_name() == expected
                    else:
                        with pytest.raises(ValueError) as exc_info:
                            get_attn_backend(16,