[ROCm][Perf] Allow MTP lens > 1 in Sparse MLA (#36681)

Signed-off-by: Teemu Virolainen <teemu.virolainen@amd.com>
This commit is contained in:
tvirolai-amd
2026-03-11 16:43:03 +02:00
committed by GitHub
parent f3163bba67
commit a9e532afe2

View File

@@ -214,11 +214,15 @@ class SpecDecodeBaseProposer:
# Determine allowed attention backends once during initialization.
self.allowed_attn_types: tuple | None = None
if current_platform.is_rocm():
from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import (
ROCMAiterMLASparseMetadata,
)
from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata
rocm_types = [
TritonAttentionMetadata,
RocmAttentionMetadata,
ROCMAiterMLASparseMetadata,
]
# ROCM_AITER_FA is an optional backend
# We check is_enabled() here to avoid importing the backend module during