From 8ccbcda5c0d460b0189f274bfbfe4947b45bd5cb Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 11 Mar 2026 08:02:44 -0700 Subject: [PATCH] [Model Runner V2] Remove unused warmup_for_prefill method (#36762) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu/model_runner.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/vllm/v1/worker/gpu/model_runner.py b/vllm/v1/worker/gpu/model_runner.py index 58ff78b12..c4fe833ff 100644 --- a/vllm/v1/worker/gpu/model_runner.py +++ b/vllm/v1/worker/gpu/model_runner.py @@ -532,13 +532,6 @@ class GPUModelRunner(LoRAModelRunnerMixin): ) return cuda_graph_size - def warmup_for_prefill(self) -> None: - # For FlashInfer, we would like to execute a dummy prefill run - # to trigger JIT compilation. - if all("FLASHINFER" in b.get_name() for b in self.attn_backends.values()): - self._dummy_run(self.max_num_tokens, skip_attn=False) - torch.accelerator.synchronize() - def finish_requests(self, scheduler_output: SchedulerOutput) -> None: finished_req_ids = scheduler_output.finished_req_ids preempted_req_ids = scheduler_output.preempted_req_ids