[ROCm][CI] Fix cross-attention dispatch for encoder-decoder models (#38450)

Signed-off-by: Andreas Karatzas <akaratza@amd.com>
2026-03-29 00:08:03 -05:00
parent 5b8c30d62b
commit 43cc5138e5
6 changed files with 90 additions and 19 deletions
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -758,11 +758,12 @@ class AiterFlashAttentionBackend(AttentionBackend):

    @classmethod
    def supports_attn_type(cls, attn_type: str) -> bool:
-        """ROCM AITER FA supports decoder and encoder-decoder (cross) attention."""
-        return attn_type in (
-            AttentionType.DECODER,
-            AttentionType.ENCODER_DECODER,
-        )
+        """ENCODER_DECODER is not supported because the prefill path uses
+        flash_attn_varlen_func with cu_seqlens_k set to decoder
+        query_start_loc (not encoder seq lens) and causal=True, both of
+        which are incorrect for cross-attention layers.
+        """
+        return attn_type in (AttentionType.DECODER,)

    @staticmethod
    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]: