[ROCm][Kernel][V1] Enable AMD Radeon GPU Custom Paged Attention on v1 (#17004)

Signed-off-by: Hosang Yoon <hosang.yoon@amd.com>
This commit is contained in:
Hosang
2025-05-21 11:35:00 -04:00
committed by GitHub
parent 2b16104557
commit dd5fa7e04f
6 changed files with 1769 additions and 28 deletions

View File

@@ -148,6 +148,11 @@ def test_paged_attention(
or (version == "rocm" and head_size not in (64, 128))):
pytest.skip()
if (version == "rocm" and current_platform.is_navi()
and (kv_cache_dtype == "fp8" or head_size != 128
or block_size != 16 or use_alibi)):
pytest.skip()
global PARTITION_SIZE
current_platform.seed_everything(seed)
@@ -275,6 +280,7 @@ def test_paged_attention(
scale,
block_tables,
seq_lens,
None,
block_size,
max_seq_len,
alibi_slopes,
@@ -286,7 +292,7 @@ def test_paged_attention(
opcheck(torch.ops._rocm_C.paged_attention,
(output, exp_sums, max_logits, tmp_output, query,
key_cache, value_cache, num_kv_heads, scale, block_tables,
seq_lens, block_size, max_seq_len, alibi_slopes,
seq_lens, None, block_size, max_seq_len, alibi_slopes,
kv_cache_dtype, k_scale, v_scale),
cond=(head_size == HEAD_SIZES[0]
and block_size == BLOCK_SIZES[0]))