Add XPU MLA Sparse backend for DeepSeek v3.2 (#33230)
Signed-off-by: Zhang, Wuxun <wuxun.zhang@intel.com>
This commit is contained in:
@@ -57,6 +57,7 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
|
||||
ROCM_AITER_MLA_SPARSE = (
|
||||
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse.ROCMAiterMLASparseBackend"
|
||||
)
|
||||
XPU_MLA_SPARSE = "vllm.v1.attention.backends.mla.xpu_mla_sparse.XPUMLASparseBackend"
|
||||
TORCH_SDPA = "" # this tag is only used for ViT
|
||||
FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"
|
||||
FLASHINFER_MLA = (
|
||||
|
||||
Reference in New Issue
Block a user