From c878b43b640bbd5a43e78593722b2fec361eaa05 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 18 Feb 2026 15:52:50 -0800 Subject: [PATCH] [Model Runner V2] Remove unnecessary copies in PW CUDA graph capture (#34849) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu/cudagraph_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vllm/v1/worker/gpu/cudagraph_utils.py b/vllm/v1/worker/gpu/cudagraph_utils.py index 66da081b4..e3839894a 100644 --- a/vllm/v1/worker/gpu/cudagraph_utils.py +++ b/vllm/v1/worker/gpu/cudagraph_utils.py @@ -218,13 +218,11 @@ class CudaGraphManager: batch_descriptor=batch_descriptor, slot_mapping=slot_mappings, ): - hidden_states = model( + model( input_ids=input_ids, positions=positions, inputs_embeds=inputs_embeds, ) - assert self.hidden_states is not None - self.hidden_states[:num_tokens] = hidden_states @torch.inference_mode() def capture(