Support block size 32 (#35)

This commit is contained in:
Woosuk Kwon
2023-04-09 23:07:18 -07:00
committed by GitHub
parent ee88a7e5f3
commit b9926f7f66
4 changed files with 49 additions and 5 deletions

View File

@@ -654,6 +654,16 @@ void single_query_cached_kv_attention(
block_tables,
context_lens,
max_context_len);
} else if (block_size == 32) {
single_query_cached_kv_attention_launcher<uint16_t, 32>(
out,
query,
key_cache,
value_cache,
scale,
block_tables,
context_lens,
max_context_len);
} else {
assert(false);
}
@@ -679,6 +689,16 @@ void single_query_cached_kv_attention(
block_tables,
context_lens,
max_context_len);
} else if (block_size == 32) {
single_query_cached_kv_attention_launcher<float, 32>(
out,
query,
key_cache,
value_cache,
scale,
block_tables,
context_lens,
max_context_len);
} else {
assert(false);
}
@@ -834,6 +854,18 @@ void multi_query_cached_kv_attention(
block_tables,
context_lens,
max_context_len);
} else if (block_size == 32) {
multi_query_cached_kv_attention_launcher<uint16_t, 32>(
cu_query_lens,
seq_prompt_mapping,
out,
query,
key_cache,
value_cache,
scale,
block_tables,
context_lens,
max_context_len);
} else {
assert(false);
}
@@ -863,6 +895,18 @@ void multi_query_cached_kv_attention(
block_tables,
context_lens,
max_context_len);
} else if (block_size == 32) {
multi_query_cached_kv_attention_launcher<float, 32>(
cu_query_lens,
seq_prompt_mapping,
out,
query,
key_cache,
value_cache,
scale,
block_tables,
context_lens,
max_context_len);
} else {
assert(false);
}