From 107cf8e92f88678712e3aa207f054243e56df330 Mon Sep 17 00:00:00 2001 From: Rabi Mishra Date: Thu, 8 Jan 2026 13:16:07 +0530 Subject: [PATCH] fix(rocm): Add get_supported_kernel_block_sizes() to ROCM_ATTN (#31712) Signed-off-by: rabi --- vllm/v1/attention/backends/rocm_attn.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/v1/attention/backends/rocm_attn.py b/vllm/v1/attention/backends/rocm_attn.py index b9b32d07b..44fa2962a 100644 --- a/vllm/v1/attention/backends/rocm_attn.py +++ b/vllm/v1/attention/backends/rocm_attn.py @@ -11,6 +11,7 @@ from vllm.attention.backends.abstract import ( AttentionBackend, AttentionImpl, AttentionType, + MultipleOf, ) from vllm.attention.ops.chunked_prefill_paged_decode import chunked_prefill_paged_decode from vllm.attention.ops.paged_attn import PagedAttention @@ -158,6 +159,13 @@ class RocmAttentionBackend(AttentionBackend): torch.float32, ] + @staticmethod + def get_supported_kernel_block_sizes() -> list[int | MultipleOf]: + # ROCM paged attention kernel only supports block sizes 16 and 32 + # due to shared memory (LDS) constraints on AMD GPUs. + # See csrc/rocm/attention.cu CALL_CUSTOM_LAUNCHER_BLK macro. + return [16, 32] + @classmethod def get_supported_head_sizes(cls) -> list[int]: return [32, 64, 96, 128, 160, 192, 224, 256]