a little more debug1

2026-05-15 00:02:00 +00:00
parent 756ea2192f
commit 8dbd616add
1 changed files with 14 additions and 0 deletions
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -2180,6 +2180,20 @@ class DeepseekV4ForCausalLM(nn.Module):
        if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1':
            torch.cuda.synchronize()
            print("[NVFP4] post-load conversion done, CUDA OK")
+
+        # POST-LOAD: scan for all-zero params (missed renames, failed loads)
+        zero_attrs = []
+        for name, p in self.named_parameters():
+            if not torch.is_tensor(p):
+                continue
+            sample = p.flatten()[:1024] if p.numel() > 1024 else p.flatten()
+            if (sample == 0).all().item():
+                if (p == 0).all().item():
+                    zero_attrs.append((name, tuple(p.shape), str(p.dtype)))
+        print(f"[POST-LOAD] {len(zero_attrs)} all-zero param tensors:")
+        for n, s, d in zero_attrs[:50]:
+            print(f"  {n} shape={s} dtype={d}")
+
        return loaded_params

    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: