Implement single_query_cached_kv_attention kernel (#3)

This commit is contained in:
Woosuk Kwon
2023-03-01 15:02:19 -08:00
committed by GitHub
parent cbf8779afa
commit 0deacbce6e
12 changed files with 2140 additions and 60 deletions

View File

@@ -118,7 +118,7 @@ class Worker:
_pad_to_max(block_table, max_num_blocks_per_seq)
for block_table in generation_block_tables]
block_tables_tensor = torch.tensor(
padded_block_tables, dtype=int, device=self.device)
padded_block_tables, dtype=torch.int, device=self.device)
input_metadata = InputMetadata(
seq_ids=prompt_seq_ids + generation_seq_ids,