[Test] Batch Invariant: Unit test using parameterized backend (#27478)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Wentao Ye
2025-10-28 16:51:35 -04:00
committed by GitHub
parent 141e6a0505
commit 6afc28a9ba
2 changed files with 230 additions and 226 deletions

View File

@@ -753,13 +753,13 @@ def override_envs_for_invariance():
curr_attn_backend = envs.VLLM_ATTENTION_BACKEND
supported_backends = [
"FLASH_ATTN", # best supported backend
"FLEX_ATTENTION",
"FLASHINFER",
"FLASH_ATTN_MLA",
"FLASHINFER_MLA",
"TRITON_MLA",
# Not yet supported MLA backends
# "FLASHMLA",
# "FLEX_ATTENTION", # IMA issue even if we disable batch invariance
]
if curr_attn_backend not in supported_backends:
warning = (