fix: set _l1_activation_global_scale (with underscore) — attribute name mismatch

This commit is contained in:
2026-05-17 03:35:20 +00:00
parent b382a7a528
commit d2965b432d

View File

@@ -526,8 +526,8 @@ class DeepseekV4MegaMoEExperts(nn.Module):
l2_igs = w2_igs[:, 0]
else:
l2_igs = w2_igs
self._cutedsl_runner.l1_activation_global_scale = l1_igs.mean().item()
self._cutedsl_runner.l2_activation_global_scale = l2_igs.mean().item()
self._cutedsl_runner._l1_activation_global_scale = l1_igs.mean().item()
self._cutedsl_runner._l2_activation_global_scale = l2_igs.mean().item()
# Drop the original loader-side parameters
self._w13_input_scale = self.w13_input_scale.data.clone()