diff --git a/vllm/nvfp4_cutedsl.py b/vllm/nvfp4_cutedsl.py index c9542e3a..31b72dc8 100644 --- a/vllm/nvfp4_cutedsl.py +++ b/vllm/nvfp4_cutedsl.py @@ -60,8 +60,10 @@ class CuTeDSLMoERunner: self._l1_gsb = None self._l2_gsb = None - self._l1_activation_global_scale = None # set from checkpoint input_scale - self._l2_activation_global_scale = None + # Default: 1/2688 ≈ 0.000372 (amax=1 → gs=1/2688) + # Overridden in finalize_weights with checkpoint input_scale or warmup value + self._l1_activation_global_scale = 1.0 / (6.0 * 448.0) + self._l2_activation_global_scale = 1.0 / (6.0 * 448.0) # Pre-allocated cudagraph buffers (set in _allocate_buffers) self._token_indices = None