fixey wixey

This commit is contained in:
2026-05-15 06:07:18 +00:00
parent 685bce48b4
commit 311b28bd9f

View File

@@ -372,8 +372,8 @@ class DeepseekV4MegaMoEExperts(nn.Module):
if local_expert_id == -1:
return False
# DEBUG: log weight loads for expert params
if "w13_weight" in weight_name and local_expert_id < 2:
# DEBUG: log weight loads for expert params (weight only, not scales)
if shard_id in ("w1", "w3") and local_expert_id < 2 and loaded_weight.dtype in (torch.uint8, torch.int8):
print(f"[WT-LOAD] {weight_name} expert={expert_id}→local={local_expert_id} "
f"shard={shard_id} loaded_shape={tuple(loaded_weight.shape)} "
f"param_shape={tuple(param.data[local_expert_id].shape)} "