[Misc][V1] Fix type in v1 prefix caching (#11151)
This commit is contained in:
@@ -49,7 +49,7 @@ def test_prefill():
|
||||
block_hash = hash_block_tokens(parent_block_hash, block_tokens)
|
||||
assert manager.block_pool[block_id].block_hash == block_hash
|
||||
assert manager.block_pool[block_id].ref_cnt == 1
|
||||
parent_block_hash = block_hash
|
||||
parent_block_hash = block_hash.hash_value
|
||||
|
||||
# Check partial/preallocated block metadata
|
||||
for block_id in (3, 4):
|
||||
@@ -360,11 +360,15 @@ def test_preallocate_blocks(num_preallocate_tokens: int, block_size: int):
|
||||
assert not computed_blocks
|
||||
# Just ask for 1 block.
|
||||
blocks = manager.allocate_slots(req, block_size, computed_blocks)
|
||||
req.num_computed_tokens = block_size
|
||||
assert len(blocks) == 1 + num_preallocated_blocks
|
||||
|
||||
# Append slots to the block.
|
||||
req.num_computed_tokens = block_size * len(blocks) # Assume all used.
|
||||
blocks = manager.append_slots(req, block_size) # Append 1 block.
|
||||
# Assume all computed.
|
||||
manager.append_slots(req, block_size * (len(blocks) - 1))
|
||||
req.num_computed_tokens = block_size * len(blocks)
|
||||
|
||||
# Append 1 block.
|
||||
blocks = manager.append_slots(req, block_size)
|
||||
assert len(blocks) == 1 + num_preallocated_blocks
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user