[Core] Reuse empty block lists whenever possible in KVCacheBlocks to mitigate GC costs (#24964)

Signed-off-by: Jialin Ouyang <Jialin.Ouyang@gmail.com>
This commit is contained in:
Jialin Ouyang
2025-10-14 12:58:43 -07:00
committed by GitHub
parent 82af928c41
commit acaa2c0a4a
5 changed files with 53 additions and 26 deletions

View File

@@ -421,9 +421,7 @@ class Scheduler(SchedulerInterface):
# KVTransfer: WAITING reqs have num_computed_tokens > 0
# after async KV recvs are completed.
else:
new_computed_blocks = (
self.kv_cache_manager.create_empty_block_list()
)
new_computed_blocks = self.kv_cache_manager.empty_kv_cache_blocks
num_new_local_computed_tokens = 0
num_computed_tokens = request.num_computed_tokens