From a8e48a7b85f035df401cab03dfb11b16f98aa413 Mon Sep 17 00:00:00 2001 From: Andreas Karatzas Date: Thu, 26 Mar 2026 11:46:03 -0500 Subject: [PATCH] [CI] Fix conch kernel crash on 3D input by reshaping to 2D before GEMM (#38178) Signed-off-by: Andreas Karatzas --- .../model_executor/kernels/linear/mixed_precision/conch.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/kernels/linear/mixed_precision/conch.py b/vllm/model_executor/kernels/linear/mixed_precision/conch.py index cd371581b..34dad0194 100644 --- a/vllm/model_executor/kernels/linear/mixed_precision/conch.py +++ b/vllm/model_executor/kernels/linear/mixed_precision/conch.py @@ -134,8 +134,11 @@ class ConchLinearKernel(MPLinearKernel): if group_size == -1: group_size = x.shape[-1] + x_2d = x.reshape(-1, x.shape[-1]) + out_shape = x.shape[:-1] + (self.config.partition_weight_shape[1],) + output = mixed_precision_gemm( - x=x, + x=x_2d, w_q_packed=w_q.data, w_s=w_s.data, w_zp=w_zp.data if w_zp is not None else None, @@ -147,4 +150,4 @@ class ConchLinearKernel(MPLinearKernel): if bias is not None: output.add_(bias) # In-place add - return output + return output.reshape(out_shape)