From d74278fb676cbafc835fab9e970f6bcc9fd5413d Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Mon, 16 Feb 2026 19:00:29 -0800 Subject: [PATCH] [Model Runner V2] Fix unintended CPU-GPU sync in make_dummy (#34667) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu/input_batch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu/input_batch.py b/vllm/v1/worker/gpu/input_batch.py index 2fddbd01d..bdb67be11 100644 --- a/vllm/v1/worker/gpu/input_batch.py +++ b/vllm/v1/worker/gpu/input_batch.py @@ -108,7 +108,7 @@ class InputBatch: query_start_loc_np = np.empty(num_reqs + 1, dtype=np.int32) query_start_loc_np[0] = 0 np.cumsum(num_scheduled_tokens, out=query_start_loc_np[1:]) - input_buffers.query_start_loc[0] = 0 + input_buffers.query_start_loc[:1] = 0 torch.cumsum( seq_lens, dim=0, out=input_buffers.query_start_loc[1 : num_reqs + 1] )