[ROCM][KERNEL] Paged attention for V1 (#15720)
Signed-off-by: Aleksandr Malyshev <maleksan@amd.com> Signed-off-by: root <root@banff-cyxtera-s65-4.amd.com> Co-authored-by: Aleksandr Malyshev <maleksan@amd.com> Co-authored-by: root <root@banff-cyxtera-s65-4.amd.com>
This commit is contained in:
committed by
GitHub
parent
bd7599d34a
commit
e73ff24e31
@@ -164,6 +164,7 @@ def test_contexted_kv_attention(
|
||||
block_table,
|
||||
b_start_loc,
|
||||
b_seq_len,
|
||||
MAX_CTX_LEN,
|
||||
max_input_len,
|
||||
k_scale,
|
||||
v_scale,
|
||||
@@ -180,6 +181,7 @@ def test_contexted_kv_attention(
|
||||
block_table,
|
||||
b_start_loc,
|
||||
b_seq_len,
|
||||
MAX_CTX_LEN,
|
||||
max_input_len,
|
||||
k_scale,
|
||||
v_scale,
|
||||
@@ -397,6 +399,7 @@ def test_contexted_kv_attention_alibi(
|
||||
block_table,
|
||||
b_start_loc,
|
||||
b_seq_len,
|
||||
MAX_CTX_LEN,
|
||||
max_input_len,
|
||||
k_scale,
|
||||
v_scale,
|
||||
@@ -413,6 +416,7 @@ def test_contexted_kv_attention_alibi(
|
||||
block_table,
|
||||
b_start_loc,
|
||||
b_seq_len,
|
||||
MAX_CTX_LEN,
|
||||
max_input_len,
|
||||
k_scale,
|
||||
v_scale,
|
||||
|
||||
Reference in New Issue
Block a user