From a9e532afe2a1ae65c917ae977bf9090806e14721 Mon Sep 17 00:00:00 2001 From: tvirolai-amd Date: Wed, 11 Mar 2026 16:43:03 +0200 Subject: [PATCH] [ROCm][Perf] Allow MTP lens > 1 in Sparse MLA (#36681) Signed-off-by: Teemu Virolainen --- vllm/v1/spec_decode/eagle.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index a5554d99f..b985176dc 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -214,11 +214,15 @@ class SpecDecodeBaseProposer: # Determine allowed attention backends once during initialization. self.allowed_attn_types: tuple | None = None if current_platform.is_rocm(): + from vllm.v1.attention.backends.mla.rocm_aiter_mla_sparse import ( + ROCMAiterMLASparseMetadata, + ) from vllm.v1.attention.backends.rocm_attn import RocmAttentionMetadata rocm_types = [ TritonAttentionMetadata, RocmAttentionMetadata, + ROCMAiterMLASparseMetadata, ] # ROCM_AITER_FA is an optional backend # We check is_enabled() here to avoid importing the backend module during