Add XPU MLA Sparse backend for DeepSeek v3.2 (#33230)

Signed-off-by: Zhang, Wuxun <wuxun.zhang@intel.com>
This commit is contained in:
Wuxun Zhang
2026-03-11 19:19:15 +08:00
committed by GitHub
parent 40c0461f24
commit e584dce52b
9 changed files with 940 additions and 24 deletions

View File

@@ -57,6 +57,7 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
ROCM_AITER_MLA_SPARSE = (
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse.ROCMAiterMLASparseBackend"
)
XPU_MLA_SPARSE = "vllm.v1.attention.backends.mla.xpu_mla_sparse.XPUMLASparseBackend"
TORCH_SDPA = "" # this tag is only used for ViT
FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"
FLASHINFER_MLA = (