[ROCm][Bugfix] fix cache block size mismatch for aiter unified attention (#37606)
Signed-off-by: Divakar Verma <divakar.verma@amd.com>
This commit is contained in:
@@ -29,6 +29,13 @@ class RocmAiterUnifiedAttentionBackend(RocmAttentionBackend):
|
||||
def get_supported_kernel_block_sizes() -> list[int | MultipleOf]:
|
||||
return [MultipleOf(16)]
|
||||
|
||||
@classmethod
|
||||
def get_preferred_block_size(cls, default_block_size: int) -> int:
|
||||
logger.warning_once(
|
||||
"[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
|
||||
)
|
||||
return 64
|
||||
|
||||
@classmethod
|
||||
def supports_block_size(cls, block_size: int | None) -> bool:
|
||||
if block_size is None:
|
||||
|
||||
Reference in New Issue
Block a user