[Bugfix] Move the _touch(computed_blocks) call in the allocate_slots method to after the check for allocating new blocks. (#11565)

This commit is contained in:
sakunkun
2024-12-31 14:29:04 +08:00
committed by GitHub
parent 82c49d3260
commit 2c5718809b
2 changed files with 74 additions and 8 deletions

View File

@@ -191,7 +191,7 @@ class KVCacheManager:
request: The request to allocate slots.
num_tokens: The number of tokens to allocate. Note that this does
not include the tokens that have already been computed.
computed_blocks: The blocks that have already been computed.
computed_blocks: A list of computed blocks.
Returns:
A list of new allocated blocks.
@@ -200,6 +200,18 @@ class KVCacheManager:
raise ValueError(
f"num_tokens must be greater than 0, got {num_tokens}")
# If a computed block of a request is an eviction candidate (in the
# free queue and ref_cnt == 0), it cannot be counted as a free block
# when allocating this request.
num_evictable_computed_blocks = sum(1 for blk in computed_blocks
if blk.ref_cnt == 0)
num_required_blocks = cdiv(num_tokens, self.block_size)
if (num_required_blocks > self.free_block_queue.num_free_blocks -
num_evictable_computed_blocks):
# Cannot allocate new blocks.
return None
# Touch the computed blocks to make sure they won't be evicted.
if self.enable_caching:
self._touch(computed_blocks)
@@ -208,11 +220,6 @@ class KVCacheManager:
"Computed blocks should be empty when "
"prefix caching is disabled")
num_required_blocks = cdiv(num_tokens, self.block_size)
if (num_required_blocks > self.free_block_queue.num_free_blocks):
# Cannot allocate new blocks.
return None
# Determine the number of new blocks to allocate considering
# preallocated blocks.
num_new_blocks = min(