From 9482b0b085e044fe9db8926d0ba262fd70b56ca1 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 18 Mar 2026 23:37:49 +0100 Subject: [PATCH] [Bugfix] Remove assertion for NVFP4 scale dynamic range (#37465) Signed-off-by: Michael Goin --- .../layers/quantization/utils/marlin_utils_fp4.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py index e4a2ab413..d6b32c4bb 100644 --- a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py +++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py @@ -37,9 +37,6 @@ def _nvfp4_compute_scale_factor(marlin_scales: torch.Tensor) -> float: min_val = ws_float[nonzero_mask].min() if min_val < 2: sf = (2 / min_val).log2().ceil().exp2() - assert (ws_float[nonzero_mask] * sf <= 448 * (2**7)).all(), ( - "NVFP4 scale dynamic range too large for rescaling" - ) return sf.item() return 1.0