[ROCm][Perf] Allow MTP lens > 1 in Sparse MLA (#36681)
Signed-off-by: Teemu Virolainen <teemu.virolainen@amd.com>
This commit is contained in:
@@ -214,11 +214,15 @@ class SpecDecodeBaseProposer:
|
|||||||
# Determine allowed attention backends once during initialization.
|
# Determine allowed attention backends once during initialization.
|
||||||
self.allowed_attn_types: tuple | None = None
|
self.allowed_attn_types: tuple | None = None
|
||||||
if current_platform.is_rocm():
|
if current_platform.is_rocm():
|
||||||
|
from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
|
||||||
|
ROCMAiterMLASparseMetadata,
|
||||||
|
)
|
||||||
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
|
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
|
||||||
|
|
||||||
rocm_types = [
|
rocm_types = [
|
||||||
TritonAttentionMetadata,
|
TritonAttentionMetadata,
|
||||||
RocmAttentionMetadata,
|
RocmAttentionMetadata,
|
||||||
|
ROCMAiterMLASparseMetadata,
|
||||||
]
|
]
|
||||||
# ROCM_AITER_FA is an optional backend
|
# ROCM_AITER_FA is an optional backend
|
||||||
# We check is_enabled() here to avoid importing the backend module during
|
# We check is_enabled() here to avoid importing the backend module during
|
||||||
|
|||||||
Reference in New Issue
Block a user