From 2c9b4cf5bf844de0471f77e6579e16c7bc3ee0d0 Mon Sep 17 00:00:00 2001
From: Lucas Wilkinson <LucasWilkinson@users.noreply.github.com>
Date: Wed, 14 Jan 2026 23:32:22 -0700
Subject: [PATCH] [BugFix] Fix DeepSeek-V3.1 + DeepGEMM incompatible scale
 shapes (#32361)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com>
Co-authored-by: Eldar Kurtić <8884008+eldarkurtic@users.noreply.github.com>
---
 vllm/model_executor/layers/quantization/utils/quant_utils.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py
index c625768e9..c27ce3494 100644
--- a/vllm/model_executor/layers/quantization/utils/quant_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py
@@ -300,6 +300,9 @@ def get_and_maybe_dequant_weights(
     if (
         isinstance(layer.quant_method, Fp8LinearMethod)
         and not layer.quant_method.use_marlin
+        # DeepGEMM transforms the scales using `transform_sf_into_required_layout` into
+        # a layout that is not compatible with `scaled_dequantize`.
+        and not layer.quant_method.use_deep_gemm
     ):
         weight_scales = get_attribute_fallback(
             layer, ["weight_scale", "weight_scale_inv"]