diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index abf2a2ad..61625034 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -526,8 +526,8 @@ class DeepseekV4MegaMoEExperts(nn.Module): l2_igs = w2_igs[:, 0] else: l2_igs = w2_igs - self._cutedsl_runner.l1_activation_global_scale = l1_igs.mean().item() - self._cutedsl_runner.l2_activation_global_scale = l2_igs.mean().item() + self._cutedsl_runner._l1_activation_global_scale = l1_igs.mean().item() + self._cutedsl_runner._l2_activation_global_scale = l2_igs.mean().item() # Drop the original loader-side parameters self._w13_input_scale = self.w13_input_scale.data.clone()