From edee96519a1b3485bacea2d14b0ffb6c83cae871 Mon Sep 17 00:00:00 2001 From: zzaebok <44357534+zzaebok@users.noreply.github.com> Date: Fri, 10 Apr 2026 04:39:39 +0800 Subject: [PATCH] [Spec Decode] fix returning size mismatch on extract hidden states proposer (#38610) Signed-off-by: Jaebok Lee Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- vllm/v1/spec_decode/extract_hidden_states.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/v1/spec_decode/extract_hidden_states.py b/vllm/v1/spec_decode/extract_hidden_states.py index eb559845f..380836bb3 100644 --- a/vllm/v1/spec_decode/extract_hidden_states.py +++ b/vllm/v1/spec_decode/extract_hidden_states.py @@ -145,7 +145,10 @@ class ExtractHiddenStatesProposer: # Return the sampled tokens as "draft" tokens # Shape: [batch_size, 1] to match num_speculative_tokens=1 - return sampled_token_ids + # On decode steps with spec tokens, sampled_token_ids may have + # shape [batch_size, 2] (target + spec verification); slice to + # return only the target-sampled column. + return sampled_token_ids[:, :1] def _get_slot_mapping( self,