[ROCm][Kernel][V1] Enable AMD Radeon GPU Custom Paged Attention on v1 (#17004)

Signed-off-by: Hosang Yoon <hosang.yoon@amd.com>
This commit is contained in:
Hosang
2025-05-21 11:35:00 -04:00
committed by GitHub
parent 2b16104557
commit dd5fa7e04f
6 changed files with 1769 additions and 28 deletions

View File

@@ -84,7 +84,10 @@ def main(
if version == "v2":
if current_platform.is_rocm():
global PARTITION_SIZE
PARTITION_SIZE = 1024 if not args.custom_paged_attn else PARTITION_SIZE_ROCM
if not args.custom_paged_attn and not current_platform.is_navi():
PARTITION_SIZE = 1024
else:
PARTITION_SIZE = PARTITION_SIZE_ROCM
num_partitions = (max_seq_len + PARTITION_SIZE - 1) // PARTITION_SIZE
tmp_output = torch.empty(
size=(num_seqs, num_query_heads, num_partitions, head_size),
@@ -159,6 +162,7 @@ def main(
scale,
block_tables,
seq_lens,
None,
block_size,
max_seq_len,
alibi_slopes,