[Attention] Add ROCM_AITER_MLA_SPARSE to attention backend registry (#29103)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
@@ -52,6 +52,9 @@ class AttentionBackendEnum(Enum, metaclass=_AttentionBackendEnumMeta):
|
|||||||
ROCM_AITER_FA = (
|
ROCM_AITER_FA = (
|
||||||
"vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend"
|
"vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend"
|
||||||
)
|
)
|
||||||
|
ROCM_AITER_MLA_SPARSE = (
|
||||||
|
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse.ROCMAiterMLASparseBackend"
|
||||||
|
)
|
||||||
TORCH_SDPA = "" # this tag is only used for ViT
|
TORCH_SDPA = "" # this tag is only used for ViT
|
||||||
FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"
|
FLASHINFER = "vllm.v1.attention.backends.flashinfer.FlashInferBackend"
|
||||||
FLASHINFER_MLA = (
|
FLASHINFER_MLA = (
|
||||||
|
|||||||
@@ -233,10 +233,7 @@ class RocmPlatform(Platform):
|
|||||||
"Sparse MLA backend on ROCm only supports block size 1 for now."
|
"Sparse MLA backend on ROCm only supports block size 1 for now."
|
||||||
)
|
)
|
||||||
logger.info_once("Using Sparse MLA backend on V1 engine.")
|
logger.info_once("Using Sparse MLA backend on V1 engine.")
|
||||||
return (
|
return AttentionBackendEnum.ROCM_AITER_MLA_SPARSE.get_path()
|
||||||
"vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse."
|
|
||||||
"ROCMAiterMLASparseBackend"
|
|
||||||
)
|
|
||||||
|
|
||||||
if use_mla:
|
if use_mla:
|
||||||
if selected_backend is None:
|
if selected_backend is None:
|
||||||
|
|||||||
Reference in New Issue
Block a user