[ROCM] Enable aiter attn backend for qwen3-next model (#32492)

Signed-off-by: jennyyyyzhen <yzhen@hmc.edu>
2026-01-31 01:03:57 -08:00
parent f68e3ea4e1
commit 527bcd14d4
2 changed files with 2 additions and 2 deletions
--- a/docs/design/attention_backends.md
+++ b/docs/design/attention_backends.md
@@ -168,7 +168,7 @@ Priority is **1 = highest** (tried first).
 | `FLASH_ATTN` | FA3* | fp16, bf16 | `auto`, `bfloat16`, `fp8`, `fp8_e4m3`, `fp8_e5m2` | %16 | Any | ✅ | ❌ | All | 9.x |
 | `FLASH_ATTN_DIFFKV` |  | fp16, bf16 | `auto` | Any | Any | ❌ | ❌ | Decoder | Any |
 | `FLEX_ATTENTION` |  | fp16, bf16, fp32 | `auto`, `bfloat16` | Any | Any | ❌ | ✅ | Decoder, Encoder Only | Any |
-| `ROCM_AITER_FA` |  | fp16, bf16 | `auto` | %16 | 64, 128, 256 | ❌ | ❌ | Decoder | N/A |
+| `ROCM_AITER_FA` |  | fp16, bf16 | `auto` | 16, 32 | 64, 128, 256 | ❌ | ❌ | Decoder | N/A |
 | `ROCM_AITER_UNIFIED_ATTN` |  | fp16, bf16 | `auto` | Any | Any | ❌ | ❌ | Decoder | N/A |
 | `ROCM_ATTN` |  | fp16, bf16, fp32 | `auto` | 16, 32, 544 | 32, 64, 96, 128, 160, 192, 224, 256 | ❌ | ❌ | Decoder | N/A |
 | `TREE_ATTN` |  | fp16, bf16 | `auto` | %16 | 32, 64, 96, 128, 160, 192, 224, 256 | ❌ | ❌ | Decoder | Any |
--- a/vllm/v1/attention/backends/rocm_aiter_fa.py
+++ b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -683,7 +683,7 @@ class AiterFlashAttentionBackend(AttentionBackend):

    @staticmethod
    def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
-        return [MultipleOf(16)]
+        return [16, 32]

    @classmethod
    def get_supported_head_sizes(cls) -> list[int]: