Support block size 32 (#35)
This commit is contained in:
@@ -654,6 +654,16 @@ void single_query_cached_kv_attention(
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else if (block_size == 32) {
|
||||
single_query_cached_kv_attention_launcher<uint16_t, 32>(
|
||||
out,
|
||||
query,
|
||||
key_cache,
|
||||
value_cache,
|
||||
scale,
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
@@ -679,6 +689,16 @@ void single_query_cached_kv_attention(
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else if (block_size == 32) {
|
||||
single_query_cached_kv_attention_launcher<float, 32>(
|
||||
out,
|
||||
query,
|
||||
key_cache,
|
||||
value_cache,
|
||||
scale,
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
@@ -834,6 +854,18 @@ void multi_query_cached_kv_attention(
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else if (block_size == 32) {
|
||||
multi_query_cached_kv_attention_launcher<uint16_t, 32>(
|
||||
cu_query_lens,
|
||||
seq_prompt_mapping,
|
||||
out,
|
||||
query,
|
||||
key_cache,
|
||||
value_cache,
|
||||
scale,
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
@@ -863,6 +895,18 @@ void multi_query_cached_kv_attention(
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else if (block_size == 32) {
|
||||
multi_query_cached_kv_attention_launcher<float, 32>(
|
||||
cu_query_lens,
|
||||
seq_prompt_mapping,
|
||||
out,
|
||||
query,
|
||||
key_cache,
|
||||
value_cache,
|
||||
scale,
|
||||
block_tables,
|
||||
context_lens,
|
||||
max_context_len);
|
||||
} else {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user