a little more debug1

This commit is contained in:
2026-05-15 00:02:00 +00:00
parent 756ea2192f
commit 8dbd616add

View File

@@ -2180,6 +2180,20 @@ class DeepseekV4ForCausalLM(nn.Module):
if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1':
torch.cuda.synchronize()
print("[NVFP4] post-load conversion done, CUDA OK")
# POST-LOAD: scan for all-zero params (missed renames, failed loads)
zero_attrs = []
for name, p in self.named_parameters():
if not torch.is_tensor(p):
continue
sample = p.flatten()[:1024] if p.numel() > 1024 else p.flatten()
if (sample == 0).all().item():
if (p == 0).all().item():
zero_attrs.append((name, tuple(p.shape), str(p.dtype)))
print(f"[POST-LOAD] {len(zero_attrs)} all-zero param tensors:")
for n, s, d in zero_attrs[:50]:
print(f" {n} shape={s} dtype={d}")
return loaded_params
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: