From 2f0d3ba745ad56052d837747cfd1fec5e8d3e31b Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 25 Jan 2026 18:35:02 -0800 Subject: [PATCH] [Model Runner V2] Minor simplification for finish_requests (#33048) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu/model_runner.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/vllm/v1/worker/gpu/model_runner.py b/vllm/v1/worker/gpu/model_runner.py index 0206fb9b2..94601d4c6 100644 --- a/vllm/v1/worker/gpu/model_runner.py +++ b/vllm/v1/worker/gpu/model_runner.py @@ -416,13 +416,12 @@ class GPUModelRunner(LoRAModelRunnerMixin): torch.cuda.synchronize() def finish_requests(self, scheduler_output: SchedulerOutput) -> None: - if scheduler_output.preempted_req_ids is not None: - for req_id in scheduler_output.preempted_req_ids: - self.req_states.remove_request(req_id) - if self.supports_mm_inputs: - self.encoder_runner.remove_request(req_id) - self.prompt_logprobs_worker.remove_request(req_id) - for req_id in scheduler_output.finished_req_ids: + finished_req_ids = scheduler_output.finished_req_ids + if scheduler_output.preempted_req_ids: + finished_req_ids = finished_req_ids.union( + scheduler_output.preempted_req_ids + ) + for req_id in finished_req_ids: self.req_states.remove_request(req_id) if self.supports_mm_inputs: self.encoder_runner.remove_request(req_id)