fix: set _l1_activation_global_scale (with underscore) — attribute name mismatch
This commit is contained in:
@@ -526,8 +526,8 @@ class DeepseekV4MegaMoEExperts(nn.Module):
|
||||
l2_igs = w2_igs[:, 0]
|
||||
else:
|
||||
l2_igs = w2_igs
|
||||
self._cutedsl_runner.l1_activation_global_scale = l1_igs.mean().item()
|
||||
self._cutedsl_runner.l2_activation_global_scale = l2_igs.mean().item()
|
||||
self._cutedsl_runner._l1_activation_global_scale = l1_igs.mean().item()
|
||||
self._cutedsl_runner._l2_activation_global_scale = l2_igs.mean().item()
|
||||
|
||||
# Drop the original loader-side parameters
|
||||
self._w13_input_scale = self.w13_input_scale.data.clone()
|
||||
|
||||
Reference in New Issue
Block a user