Add FLASHINFER_MLA to test_mla_backends and add B200 CI run (#27663)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
Matthew Bonanni
2025-10-31 14:12:19 -04:00
committed by GitHub
parent 5e8862e9e0
commit f29aeb5a25
4 changed files with 208 additions and 64 deletions

View File

@@ -285,7 +285,17 @@ full_cg_backend_configs = {
name="CutlassMLA",
env_vars={
"VLLM_ATTENTION_BACKEND": "CUTLASS_MLA",
"FORCE_NUM_KV_SPLITS": "1", # TODO: remove this when hang issue is fixed
},
comp_config={
"cudagraph_mode": "FULL_AND_PIECEWISE",
},
specific_gpu_arch=(10, 0),
),
# FlashInfer MLA on Blackwell
"FlashInferMLA": BackendConfig(
name="FlashInferMLA",
env_vars={
"VLLM_ATTENTION_BACKEND": "FLASHINFER_MLA",
},
comp_config={
"cudagraph_mode": "FULL_AND_PIECEWISE",