[NVIDIA] Add Cutlass MLA backend (#17625)

This commit is contained in:
Kaixi Hou
2025-06-04 12:40:26 +08:00
committed by GitHub
parent 8d646c2e53
commit 41aa578428
7 changed files with 111 additions and 3 deletions

View File

@@ -1395,6 +1395,7 @@ class EngineArgs:
"PALLAS_VLLM_V1",
"TRITON_ATTN_VLLM_V1",
"TRITON_MLA",
"CUTLASS_MLA_VLLM_V1",
"FLASHMLA",
"FLASHINFER",
"FLASHINFER_VLLM_V1",