[BugFix] Fix stuck stats/metrics after requests are aborted (#22995)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -298,7 +298,12 @@ class BlockPool:
|
||||
Returns:
|
||||
The KV cache usage (between 0.0 and 1.0).
|
||||
"""
|
||||
return 1.0 - (self.get_num_free_blocks() / self.num_gpu_blocks)
|
||||
|
||||
# Subtract 1 to account for null block.
|
||||
total_gpu_blocks = self.num_gpu_blocks - 1
|
||||
if not total_gpu_blocks:
|
||||
return 0
|
||||
return 1.0 - (self.get_num_free_blocks() / total_gpu_blocks)
|
||||
|
||||
def take_events(self) -> list[KVCacheEvent]:
|
||||
"""Atomically takes all events and clears the queue.
|
||||
|
||||
@@ -902,10 +902,13 @@ class Scheduler(SchedulerInterface):
|
||||
finished_requests=finished_set)
|
||||
finished_req_ids.clear()
|
||||
|
||||
if engine_core_outputs:
|
||||
if (stats := self.make_stats(spec_decoding_stats)) is not None:
|
||||
# Return stats to only one of the front-ends.
|
||||
next(iter(engine_core_outputs.values())).scheduler_stats = (
|
||||
self.make_stats(spec_decoding_stats))
|
||||
if (eco := next(iter(engine_core_outputs.values()), None)) is None:
|
||||
# We must return the stats even if there are no request
|
||||
# outputs this step.
|
||||
engine_core_outputs[0] = eco = EngineCoreOutputs()
|
||||
eco.scheduler_stats = stats
|
||||
|
||||
return engine_core_outputs
|
||||
|
||||
|
||||
Reference in New Issue
Block a user