From 1fc375dc053424c3b8656802d35d5251e75fc857 Mon Sep 17 00:00:00 2001 From: rishitdholakia13 <123388671+rishitdholakia13@users.noreply.github.com> Date: Fri, 15 Aug 2025 17:25:05 -0600 Subject: [PATCH] [Structured Outputs] [Bug] Fix misalignment in apply_grammar_bitmask causing unintended masking and NaN logits (#22963) Signed-off-by: rishitdholakia13 --- vllm/v1/worker/gpu_model_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index bef67486d..4c919b392 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1355,10 +1355,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): cumulative_index += 1 + num_spec_tokens grammar_bitmask = sorted_bitmask - # If the grammar bitmask and the logits have the same shape + # If the length of out indices and the logits have the same shape # we don't need to pass indices to the kernel, # since the bitmask is already aligned with the logits. - skip_out_indices = grammar_bitmask.shape[0] == logits.shape[0] + skip_out_indices = len(out_indices) == logits.shape[0] # Serialization of np.ndarray is much more efficient than a tensor, # so we receive it in that format.