[ROCm][Bugfix] fix cache block size mismatch for aiter unified attention (#37606)

Signed-off-by: Divakar Verma <divakar.verma@amd.com>
This commit is contained in:
Divakar Verma
2026-03-19 20:00:08 -04:00
committed by GitHub
parent be12afd284
commit 4ca3fa6bb4
2 changed files with 7 additions and 24 deletions

View File

@@ -29,6 +29,13 @@ class RocmAiterUnifiedAttentionBackend(RocmAttentionBackend):
def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
return [MultipleOf(16)]
@classmethod
def get_preferred_block_size(cls, default_block_size: int) -> int:
logger.warning_once(
"[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
)
return 64
@classmethod
def supports_block_size(cls, block_size: int | None) -> bool:
if block_size is None: