From 2c9b4cf5bf844de0471f77e6579e16c7bc3ee0d0 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Wed, 14 Jan 2026 23:32:22 -0700 Subject: [PATCH] [BugFix] Fix DeepSeek-V3.1 + DeepGEMM incompatible scale shapes (#32361) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Lucas Wilkinson Co-authored-by: Eldar Kurtić <8884008+eldarkurtic@users.noreply.github.com> --- vllm/model_executor/layers/quantization/utils/quant_utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index c625768e9..c27ce3494 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -300,6 +300,9 @@ def get_and_maybe_dequant_weights( if ( isinstance(layer.quant_method, Fp8LinearMethod) and not layer.quant_method.use_marlin + # DeepGEMM transforms the scales using `transform_sf_into_required_layout` into + # a layout that is not compatible with `scaled_dequantize`. + and not layer.quant_method.use_deep_gemm ): weight_scales = get_attribute_fallback( layer, ["weight_scale", "weight_scale_inv"]