diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 4b55e095..e8a16fe2 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -372,8 +372,8 @@ class DeepseekV4MegaMoEExperts(nn.Module): if local_expert_id == -1: return False - # DEBUG: log weight loads for expert params - if "w13_weight" in weight_name and local_expert_id < 2: + # DEBUG: log weight loads for expert params (weight only, not scales) + if shard_id in ("w1", "w3") and local_expert_id < 2 and loaded_weight.dtype in (torch.uint8, torch.int8): print(f"[WT-LOAD] {weight_name} expert={expert_id}→local={local_expert_id} " f"shard={shard_id} loaded_shape={tuple(loaded_weight.shape)} " f"param_shape={tuple(param.data[local_expert_id].shape)} "