Support block size 32 (#35)

2023-04-09 23:07:18 -07:00
parent ee88a7e5f3
commit b9926f7f66
4 changed files with 49 additions and 5 deletions
--- a/csrc/attention_kernels.cu
+++ b/csrc/attention_kernels.cu
@@ -654,6 +654,16 @@ void single_query_cached_kv_attention(
        block_tables,
        context_lens,
        max_context_len);
+    } else if (block_size == 32) {
+      single_query_cached_kv_attention_launcher<uint16_t, 32>(
+        out,
+        query,
+        key_cache,
+        value_cache,
+        scale,
+        block_tables,
+        context_lens,
+        max_context_len);
    } else {
      assert(false);
    }
@@ -679,6 +689,16 @@ void single_query_cached_kv_attention(
        block_tables,
        context_lens,
        max_context_len);
+    } else if (block_size == 32) {
+      single_query_cached_kv_attention_launcher<float, 32>(
+        out,
+        query,
+        key_cache,
+        value_cache,
+        scale,
+        block_tables,
+        context_lens,
+        max_context_len);
    } else {
      assert(false);
    }
@@ -834,6 +854,18 @@ void multi_query_cached_kv_attention(
        block_tables,
        context_lens,
        max_context_len);
+    } else if (block_size == 32) {
+      multi_query_cached_kv_attention_launcher<uint16_t, 32>(
+        cu_query_lens,
+        seq_prompt_mapping,
+        out,
+        query,
+        key_cache,
+        value_cache,
+        scale,
+        block_tables,
+        context_lens,
+        max_context_len);
    } else {
      assert(false);
    }
@@ -863,6 +895,18 @@ void multi_query_cached_kv_attention(
        block_tables,
        context_lens,
        max_context_len);
+    } else if (block_size == 32) {
+      multi_query_cached_kv_attention_launcher<float, 32>(
+        cu_query_lens,
+        seq_prompt_mapping,
+        out,
+        query,
+        key_cache,
+        value_cache,
+        scale,
+        block_tables,
+        context_lens,
+        max_context_len);
    } else {
      assert(false);
    }