From 56a62c310cc4840671949488c60c40df5e0e2f1f Mon Sep 17 00:00:00 2001 From: Matthias Gehre Date: Fri, 20 Mar 2026 16:31:57 +0100 Subject: [PATCH] [Bugfix] Reject channelwise quantization (group_size <= 0) in ExllamaLinearKernel (#37331) Signed-off-by: Matthias Gehre --- .../kernels/linear/mixed_precision/exllama.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/model_executor/kernels/linear/mixed_precision/exllama.py b/vllm/model_executor/kernels/linear/mixed_precision/exllama.py index 537a8e278..3ad43a225 100644 --- a/vllm/model_executor/kernels/linear/mixed_precision/exllama.py +++ b/vllm/model_executor/kernels/linear/mixed_precision/exllama.py @@ -59,6 +59,13 @@ class ExllamaLinearKernel(MPLinearKernel): f"{cls.SUPPORTED_QUANT_TYPES}", ) + if c.group_size <= 0: + return ( + False, + f"Group size ({c.group_size}) must be positive, " + "Exllama does not support channelwise quantization", + ) + if c.full_weight_shape[0] % c.group_size != 0: return ( False,