[ROCm][Perf] Allow MTP lens > 1 in Sparse MLA (#36681)
Signed-off-by: Teemu Virolainen <teemu.virolainen@amd.com>
This commit is contained in:
@@ -214,11 +214,15 @@ class SpecDecodeBaseProposer:
|
||||
# Determine allowed attention backends once during initialization.
|
||||
self.allowed_attn_types: tuple | None = None
|
||||
if current_platform.is_rocm():
|
||||
from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
|
||||
ROCMAiterMLASparseMetadata,
|
||||
)
|
||||
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
|
||||
|
||||
rocm_types = [
|
||||
TritonAttentionMetadata,
|
||||
RocmAttentionMetadata,
|
||||
ROCMAiterMLASparseMetadata,
|
||||
]
|
||||
# ROCM_AITER_FA is an optional backend
|
||||
# We check is_enabled() here to avoid importing the backend module during
|
||||
|
||||
Reference in New Issue
Block a user