a little more debug1
This commit is contained in:
@@ -2180,6 +2180,20 @@ class DeepseekV4ForCausalLM(nn.Module):
|
||||
if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1':
|
||||
torch.cuda.synchronize()
|
||||
print("[NVFP4] post-load conversion done, CUDA OK")
|
||||
|
||||
# POST-LOAD: scan for all-zero params (missed renames, failed loads)
|
||||
zero_attrs = []
|
||||
for name, p in self.named_parameters():
|
||||
if not torch.is_tensor(p):
|
||||
continue
|
||||
sample = p.flatten()[:1024] if p.numel() > 1024 else p.flatten()
|
||||
if (sample == 0).all().item():
|
||||
if (p == 0).all().item():
|
||||
zero_attrs.append((name, tuple(p.shape), str(p.dtype)))
|
||||
print(f"[POST-LOAD] {len(zero_attrs)} all-zero param tensors:")
|
||||
for n, s, d in zero_attrs[:50]:
|
||||
print(f" {n} shape={s} dtype={d}")
|
||||
|
||||
return loaded_params
|
||||
|
||||
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
|
||||
|
||||
Reference in New Issue
Block a user