[ROCm][Kernel][V1] Enable AMD Radeon GPU Custom Paged Attention on v1 (#17004)
Signed-off-by: Hosang Yoon <hosang.yoon@amd.com>
This commit is contained in:
@@ -84,7 +84,10 @@ def main(
|
||||
if version == "v2":
|
||||
if current_platform.is_rocm():
|
||||
global PARTITION_SIZE
|
||||
PARTITION_SIZE = 1024 if not args.custom_paged_attn else PARTITION_SIZE_ROCM
|
||||
if not args.custom_paged_attn and not current_platform.is_navi():
|
||||
PARTITION_SIZE = 1024
|
||||
else:
|
||||
PARTITION_SIZE = PARTITION_SIZE_ROCM
|
||||
num_partitions = (max_seq_len + PARTITION_SIZE - 1) // PARTITION_SIZE
|
||||
tmp_output = torch.empty(
|
||||
size=(num_seqs, num_query_heads, num_partitions, head_size),
|
||||
@@ -159,6 +162,7 @@ def main(
|
||||
scale,
|
||||
block_tables,
|
||||
seq_lens,
|
||||
None,
|
||||
block_size,
|
||||
max_seq_len,
|
||||
alibi_slopes,
|
||||
|
||||
Reference in New Issue
Block a user