Add FLASHINFER_MLA to test_mla_backends and add B200 CI run (#27663)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
2025-10-31 14:12:19 -04:00
parent 5e8862e9e0
commit f29aeb5a25
4 changed files with 208 additions and 64 deletions
--- a/tests/v1/attention/utils.py
+++ b/tests/v1/attention/utils.py
@@ -285,7 +285,17 @@ full_cg_backend_configs = {
        name="CutlassMLA",
        env_vars={
            "VLLM_ATTENTION_BACKEND": "CUTLASS_MLA",
-            "FORCE_NUM_KV_SPLITS": "1",  # TODO: remove this when hang issue is fixed
+        },
+        comp_config={
+            "cudagraph_mode": "FULL_AND_PIECEWISE",
+        },
+        specific_gpu_arch=(10, 0),
+    ),
+    # FlashInfer MLA on Blackwell
+    "FlashInferMLA": BackendConfig(
+        name="FlashInferMLA",
+        env_vars={
+            "VLLM_ATTENTION_BACKEND": "FLASHINFER_MLA",
        },
        comp_config={
            "cudagraph_mode": "FULL_AND_PIECEWISE",