[Bugfix] get_num_blocks_to_allocate with null_block (#19031)

Signed-off-by: Chen Zhang <zhangch99@outlook.com>
This commit is contained in:
Chen Zhang
2025-06-04 06:30:55 +08:00
committed by GitHub
parent 135cf55cd1
commit b5fd9506c1
4 changed files with 32 additions and 4 deletions

View File

@@ -63,6 +63,7 @@ class BlockPool:
# The ref_cnt of null_block is not maintained, needs special care to
# avoid freeing it.
self.null_block = self.free_block_queue.popleft()
self.null_block.is_null = True
self.enable_kv_cache_events = enable_kv_cache_events
self.kv_event_queue: list[KVCacheEvent] = []
@@ -252,7 +253,7 @@ class BlockPool:
for block in blocks:
# ref_cnt=0 means this block is in the free list (i.e. eviction
# candidate), so remove it.
if block.ref_cnt == 0 and block != self.null_block:
if block.ref_cnt == 0 and not block.is_null:
self.free_block_queue.remove(block)
block.incr_ref()
@@ -267,7 +268,7 @@ class BlockPool:
for block in ordered_blocks:
block.decr_ref()
# null_block should not be added to the free list.
if block.ref_cnt == 0 and block != self.null_block:
if block.ref_cnt == 0 and not block.is_null:
self.free_block_queue.append(block)
def reset_prefix_cache(self) -> bool:

View File

@@ -125,6 +125,9 @@ class KVCacheBlock:
prev_free_block: Optional["KVCacheBlock"] = None
next_free_block: Optional["KVCacheBlock"] = None
# Whether the block is a null block that should never be cached.
is_null: bool = False
def incr_ref(self):
self.ref_cnt += 1

View File

@@ -83,8 +83,9 @@ class SingleTypeKVCacheManager(ABC):
# free queue and ref_cnt == 0), it will be changed from a free block
# to a computed block when the request is allocated, so we also count
# it as needed to be allocated.
num_evictable_computed_blocks = sum(blk.ref_cnt == 0
for blk in new_computed_blocks)
num_evictable_computed_blocks = sum(
blk.ref_cnt == 0 and not blk.is_null
for blk in new_computed_blocks)
return ((num_new_blocks + num_evictable_computed_blocks) *
self.num_kv_cache_groups)