[V1] Eagerly remove finished requests from the batch (#14388)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-03-07 10:56:00 -08:00
committed by GitHub
parent c6359e8ca6
commit 8ed5421aaa
9 changed files with 58 additions and 16 deletions

View File

@@ -253,13 +253,14 @@ class AsyncLLM(EngineClient):
while True:
# 1) Pull EngineCoreOutputs from the EngineCore.
outputs = await self.engine_core.get_output_async()
num_outputs = len(outputs.outputs)
iteration_stats = IterationStats() if self.log_stats else None
iteration_stats = IterationStats() if (
self.log_stats and num_outputs) else None
# Split outputs into chunks of at most
# VLLM_V1_OUTPUT_PROC_CHUNK_SIZE, so that we don't block the
# event loop for too long.
num_outputs = len(outputs.outputs)
if num_outputs <= VLLM_V1_OUTPUT_PROC_CHUNK_SIZE:
slices = (outputs.outputs, )
else:
@@ -313,7 +314,6 @@ class AsyncLLM(EngineClient):
return
assert scheduler_stats is not None
assert iteration_stats is not None
for stat_logger in self.stat_loggers:
stat_logger.record(scheduler_stats=scheduler_stats,
iteration_stats=iteration_stats)

View File

@@ -153,7 +153,9 @@ class EngineCore:
def step(self) -> EngineCoreOutputs:
"""Schedule, execute, and make output."""
if not self.scheduler.has_unfinished_requests():
# Check for any requests remaining in the scheduler - unfinished,
# or finished and not yet removed from the batch.
if not self.scheduler.has_requests():
return EngineCoreOutputs(
outputs=[],
scheduler_stats=self.scheduler.make_stats(),
@@ -335,7 +337,7 @@ class EngineCoreProc(EngineCore):
# Loop until process is sent a SIGINT or SIGTERM
while True:
# 1) Poll the input queue until there is work to do.
while not self.scheduler.has_unfinished_requests():
while not self.scheduler.has_requests():
logger.debug("EngineCore busy loop waiting.")
req = self.input_queue.get()
self._handle_client_request(*req)