[ROCM][KERNEL] Paged attention for V1 (#15720)

Signed-off-by: Aleksandr Malyshev <maleksan@amd.com>
Signed-off-by: root <root@banff-cyxtera-s65-4.amd.com>
Co-authored-by: Aleksandr Malyshev <maleksan@amd.com>
Co-authored-by: root <root@banff-cyxtera-s65-4.amd.com>
This commit is contained in:
Aleksandr Malyshev
2025-04-02 19:48:00 -07:00
committed by GitHub
parent bd7599d34a
commit e73ff24e31
11 changed files with 219 additions and 109 deletions

View File

@@ -168,6 +168,7 @@ class TritonAttentionImpl(AttentionImpl):
block_table=attn_metadata.block_table,
query_start_loc=attn_metadata.query_start_loc,
seq_lens=attn_metadata.seq_lens,
max_seq_len=attn_metadata.max_seq_len,
max_query_len=attn_metadata.max_query_len,
k_scale=layer._k_scale,
v_scale=layer._v_scale,