[ROCm][Kernel][V1] Enable AMD Radeon GPU Custom Paged Attention on v1 (#17004)

Signed-off-by: Hosang Yoon <hosang.yoon@amd.com>
2025-05-21 11:35:00 -04:00
parent 2b16104557
commit dd5fa7e04f
6 changed files with 1769 additions and 28 deletions
--- a/tests/kernels/attention/test_attention.py
+++ b/tests/kernels/attention/test_attention.py
@@ -148,6 +148,11 @@ def test_paged_attention(
            or (version == "rocm" and head_size not in (64, 128))):
        pytest.skip()

+    if (version == "rocm" and current_platform.is_navi()
+            and (kv_cache_dtype == "fp8" or head_size != 128
+                 or block_size != 16 or use_alibi)):
+        pytest.skip()
+
    global PARTITION_SIZE

    current_platform.seed_everything(seed)
@@ -275,6 +280,7 @@ def test_paged_attention(
                scale,
                block_tables,
                seq_lens,
+                None,
                block_size,
                max_seq_len,
                alibi_slopes,
@@ -286,7 +292,7 @@ def test_paged_attention(
            opcheck(torch.ops._rocm_C.paged_attention,
                    (output, exp_sums, max_logits, tmp_output, query,
                     key_cache, value_cache, num_kv_heads, scale, block_tables,
-                     seq_lens, block_size, max_seq_len, alibi_slopes,
+                     seq_lens, None, block_size, max_seq_len, alibi_slopes,
                     kv_cache_dtype, k_scale, v_scale),
                    cond=(head_size == HEAD_SIZES[0]
                          and block_size == BLOCK_SIZES[0]))