From 8dbd616add2fa6ed5944c259e4b56098a94f2ec2 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 15 May 2026 00:02:00 +0000 Subject: [PATCH] a little more debug1 --- vllm/patches/deepseek_v4.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index ffc64a66..95f2c365 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -2180,6 +2180,20 @@ class DeepseekV4ForCausalLM(nn.Module): if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1': torch.cuda.synchronize() print("[NVFP4] post-load conversion done, CUDA OK") + + # POST-LOAD: scan for all-zero params (missed renames, failed loads) + zero_attrs = [] + for name, p in self.named_parameters(): + if not torch.is_tensor(p): + continue + sample = p.flatten()[:1024] if p.numel() > 1024 else p.flatten() + if (sample == 0).all().item(): + if (p == 0).all().item(): + zero_attrs.append((name, tuple(p.shape), str(p.dtype))) + print(f"[POST-LOAD] {len(zero_attrs)} all-zero param tensors:") + for n, s, d in zero_attrs[:50]: + print(f" {n} shape={s} dtype={d}") + return loaded_params def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: