From 4af9ed21cba9e4bb85cd7cc124aa6f23cd0ae9a5 Mon Sep 17 00:00:00 2001 From: "zhao, zhenhui" Date: Tue, 17 Mar 2026 19:14:07 +0800 Subject: [PATCH] =?UTF-8?q?[Bugfix](xpu):=20prevent=20=E2=80=9Cselected=20?= =?UTF-8?q?index=20k=20out=20of=20range=E2=80=9D=20in=20TP=20decode=20path?= =?UTF-8?q?=20(#37259)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zhenzhao --- vllm/_xpu_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/_xpu_ops.py b/vllm/_xpu_ops.py index 91f5e0290..a2eb5ff3a 100644 --- a/vllm/_xpu_ops.py +++ b/vllm/_xpu_ops.py @@ -426,7 +426,8 @@ class xpu_ops: mask = positions <= index_end_pos # mask: [B * N, L] logits = logits.masked_fill(~mask, float("-inf")) - topk_indices = logits.topk(topk_tokens, dim=-1)[1].to(torch.int32) # [B * N, K] + real_topk = min(topk_tokens, logits.shape[-1]) + topk_indices = logits.topk(real_topk, dim=-1)[1].to(torch.int32) # [B * N, K] # ensure we don't set indices for the top k # that is out of range(masked already) # this will happen if context length is shorter than K