From 30132cd144af8876e7c0d2aac28cabaea3710254 Mon Sep 17 00:00:00 2001 From: Xiao Li Date: Sat, 21 Feb 2026 21:11:54 -0800 Subject: [PATCH] Fix apply_top_k_top_p_triton called by non-cuda logits Tensor (#35030) Signed-off-by: Xiao Li --- vllm/v1/sample/ops/topk_topp_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index 33f7090e4..dcae8f974 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -248,7 +248,7 @@ def apply_top_k_top_p( if p is None and k is None: return logits - if HAS_TRITON and logits.shape[0] >= 8: + if HAS_TRITON and logits.shape[0] >= 8 and logits.is_cuda: return apply_top_k_top_p_triton(logits, k, p) # Use pytorch sort implementation for small batch sizes.