Implement single_query_cached_kv_attention kernel (#3)

2023-03-01 15:02:19 -08:00
parent cbf8779afa
commit 0deacbce6e
12 changed files with 2140 additions and 60 deletions
--- a/cacheflow/worker/worker.py
+++ b/cacheflow/worker/worker.py
@@ -118,7 +118,7 @@ class Worker:
            _pad_to_max(block_table, max_num_blocks_per_seq)
            for block_table in generation_block_tables]
        block_tables_tensor = torch.tensor(
-            padded_block_tables, dtype=int, device=self.device)
+            padded_block_tables, dtype=torch.int, device=self.device)

        input_metadata = InputMetadata(
            seq_ids=prompt_seq_ids + generation_seq_ids,