[Bug] Fix Batch Invariant MLA test (#28967)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
2025-11-19 16:18:32 -05:00
parent 68d7231991
commit 1607e664f0
2 changed files with 33 additions and 10 deletions
--- a/vllm/model_executor/layers/batch_invariant.py
+++ b/vllm/model_executor/layers/batch_invariant.py
@@ -803,11 +803,11 @@ def override_envs_for_invariance():
        "FLASH_ATTN",  # best supported backend
        "FLASHINFER",
        "FLASH_ATTN_MLA",
-        "FLASHINFER_MLA",
        "TRITON_MLA",
        # Not yet supported MLA backends
        # "FLASHMLA",
        # "FLEX_ATTENTION", # IMA issue even if we disable batch invariance
+        # "FLASHINFER_MLA", https://github.com/vllm-project/vllm/pull/28967
    ]
    if curr_attn_backend not in supported_backends:
        warning = (