[BugFix] Fix stuck stats/metrics after requests are aborted (#22995)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-08-19 22:50:29 -07:00
committed by GitHub
parent de7b67a023
commit 8fd920924c
3 changed files with 106 additions and 5 deletions

View File

@@ -298,7 +298,12 @@ class BlockPool:
Returns:
The KV cache usage (between 0.0 and 1.0).
"""
return 1.0 - (self.get_num_free_blocks() / self.num_gpu_blocks)
# Subtract 1 to account for null block.
total_gpu_blocks = self.num_gpu_blocks - 1
if not total_gpu_blocks:
return 0
return 1.0 - (self.get_num_free_blocks() / total_gpu_blocks)
def take_events(self) -> list[KVCacheEvent]:
"""Atomically takes all events and clears the queue.

View File

@@ -902,10 +902,13 @@ class Scheduler(SchedulerInterface):
finished_requests=finished_set)
finished_req_ids.clear()
if engine_core_outputs:
if (stats := self.make_stats(spec_decoding_stats)) is not None:
# Return stats to only one of the front-ends.
next(iter(engine_core_outputs.values())).scheduler_stats = (
self.make_stats(spec_decoding_stats))
if (eco := next(iter(engine_core_outputs.values()), None)) is None:
# We must return the stats even if there are no request
# outputs this step.
engine_core_outputs[0] = eco = EngineCoreOutputs()
eco.scheduler_stats = stats
return engine_core_outputs