diff --git a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py index e4a2ab413..d6b32c4bb 100644 --- a/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py +++ b/vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py @@ -37,9 +37,6 @@ def _nvfp4_compute_scale_factor(marlin_scales: torch.Tensor) -> float: min_val = ws_float[nonzero_mask].min() if min_val < 2: sf = (2 / min_val).log2().ceil().exp2() - assert (ws_float[nonzero_mask] * sf <= 448 * (2**7)).all(), ( - "NVFP4 scale dynamic range too large for rescaling" - ) return sf.item() return 1.0