Implement single_query_cached_kv_attention kernel (#3)

This commit is contained in:
Woosuk Kwon
2023-03-01 15:02:19 -08:00
committed by GitHub
parent cbf8779afa
commit 0deacbce6e
12 changed files with 2140 additions and 60 deletions

View File

@@ -15,7 +15,9 @@ class BlockManager:
block_size: int,
num_blocks: int,
) -> None:
assert block_size in [8, 16, 32]
if block_size not in [8, 16]:
raise ValueError(f'Unsupported block size: {block_size}'
'The block size must be either 8 or 16.')
self.device = device
self.block_size = block_size
self.num_blocks = num_blocks