Implement single_query_cached_kv_attention kernel (#3)
This commit is contained in:
@@ -15,7 +15,9 @@ class BlockManager:
|
||||
block_size: int,
|
||||
num_blocks: int,
|
||||
) -> None:
|
||||
assert block_size in [8, 16, 32]
|
||||
if block_size not in [8, 16]:
|
||||
raise ValueError(f'Unsupported block size: {block_size}'
|
||||
'The block size must be either 8 or 16.')
|
||||
self.device = device
|
||||
self.block_size = block_size
|
||||
self.num_blocks = num_blocks
|
||||
|
||||
Reference in New Issue
Block a user