From 1fc375dc053424c3b8656802d35d5251e75fc857 Mon Sep 17 00:00:00 2001
From: rishitdholakia13 <123388671+rishitdholakia13@users.noreply.github.com>
Date: Fri, 15 Aug 2025 17:25:05 -0600
Subject: [PATCH] [Structured Outputs] [Bug] Fix misalignment in
 apply_grammar_bitmask causing unintended masking and NaN logits (#22963)

Signed-off-by: rishitdholakia13 <rishit+github@cohere.com>
---
 vllm/v1/worker/gpu_model_runner.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index bef67486d..4c919b392 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -1355,10 +1355,10 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
             cumulative_index += 1 + num_spec_tokens
         grammar_bitmask = sorted_bitmask
 
-        # If the grammar bitmask and the logits have the same shape
+        # If the length of out indices and the logits have the same shape
         # we don't need to pass indices to the kernel,
         # since the bitmask is already aligned with the logits.
-        skip_out_indices = grammar_bitmask.shape[0] == logits.shape[0]
+        skip_out_indices = len(out_indices) == logits.shape[0]
 
         # Serialization of np.ndarray is much more efficient than a tensor,
         # so we receive it in that format.