[Misc][V1] Fix type in v1 prefix caching (#11151)

This commit is contained in:
Cody Yu
2024-12-12 16:57:40 -08:00
committed by GitHub
parent db6c264a1e
commit 78ed8f57d8
3 changed files with 27 additions and 15 deletions

View File

@@ -49,7 +49,7 @@ def test_prefill():
block_hash = hash_block_tokens(parent_block_hash, block_tokens)
assert manager.block_pool[block_id].block_hash == block_hash
assert manager.block_pool[block_id].ref_cnt == 1
parent_block_hash = block_hash
parent_block_hash = block_hash.hash_value
# Check partial/preallocated block metadata
for block_id in (3, 4):
@@ -360,11 +360,15 @@ def test_preallocate_blocks(num_preallocate_tokens: int, block_size: int):
assert not computed_blocks
# Just ask for 1 block.
blocks = manager.allocate_slots(req, block_size, computed_blocks)
req.num_computed_tokens = block_size
assert len(blocks) == 1 + num_preallocated_blocks
# Append slots to the block.
req.num_computed_tokens = block_size * len(blocks) # Assume all used.
blocks = manager.append_slots(req, block_size) # Append 1 block.
# Assume all computed.
manager.append_slots(req, block_size * (len(blocks) - 1))
req.num_computed_tokens = block_size * len(blocks)
# Append 1 block.
blocks = manager.append_slots(req, block_size)
assert len(blocks) == 1 + num_preallocated_blocks