fixey wixey
This commit is contained in:
@@ -372,8 +372,8 @@ class DeepseekV4MegaMoEExperts(nn.Module):
|
||||
if local_expert_id == -1:
|
||||
return False
|
||||
|
||||
# DEBUG: log weight loads for expert params
|
||||
if "w13_weight" in weight_name and local_expert_id < 2:
|
||||
# DEBUG: log weight loads for expert params (weight only, not scales)
|
||||
if shard_id in ("w1", "w3") and local_expert_id < 2 and loaded_weight.dtype in (torch.uint8, torch.int8):
|
||||
print(f"[WT-LOAD] {weight_name} expert={expert_id}→local={local_expert_id} "
|
||||
f"shard={shard_id} loaded_shape={tuple(loaded_weight.shape)} "
|
||||
f"param_shape={tuple(param.data[local_expert_id].shape)} "
|
||||
|
||||
Reference in New Issue
Block a user