debug more4

This commit is contained in:
2026-05-14 22:50:51 +00:00
parent fd5f04eb15
commit e46ff41569
2 changed files with 46 additions and 0 deletions

View File

@@ -2202,6 +2202,24 @@ class DeepseekV4ForCausalLM(nn.Module):
if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1':
torch.cuda.synchronize()
print("[NVFP4] post-load conversion done, CUDA OK")
# Post-load NaN scale scan — find any scale tensors that are NaN
# after weight loading + post-load conversion
nan_attrs = []
for name, module in self.named_modules():
for attr in ('weight_scale', 'weight_scale_inv', 'weight_scale_2',
'input_scale', 'act_scale'):
if hasattr(module, attr):
t = getattr(module, attr)
if torch.is_tensor(t) and torch.isnan(t.to(torch.float32)).any().item():
nan_attrs.append((name, attr, tuple(t.shape), str(t.dtype)))
if nan_attrs:
print(f"[POST-LOAD] {len(nan_attrs)} NaN scale tensors after loading:")
for n, a, s, d in nan_attrs[:20]:
print(f" {n}.{a} shape={s} dtype={d}")
else:
print("[POST-LOAD] No NaN scale tensors found — scales are clean")
return loaded_params
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:

View File

@@ -377,6 +377,34 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer):
f"inf_any={torch.isinf(zf32).any().item()} "
f"shape={tuple(z.shape)} dtype={z.dtype}")
# wo_b inspection — dump all tensor attributes once
if _debug and not hasattr(self, '_wo_b_inspected'):
self._wo_b_inspected = True
layer_idx = getattr(self, 'layer_idx', None) or getattr(self, 'layer_name', '?')
print(f"[wo_b-inspect L{layer_idx}] type={type(self.wo_b).__name__}")
print(f"[wo_b-inspect L{layer_idx}] z (input) nan_frac="
f"{torch.isnan(z.to(torch.float32)).float().mean().item():.4f} "
f"abs_max={z.to(torch.float32).abs().max().item():.4e}")
for attr in dir(self.wo_b):
if attr.startswith('_'):
continue
try:
v = getattr(self.wo_b, attr)
except Exception:
continue
if torch.is_tensor(v):
vf = v.to(torch.float32) if v.dtype not in (torch.float32,) else v
nf = torch.isnan(vf).float().mean().item()
inf = torch.isinf(vf).any().item()
try:
vmin = vf.min().item()
vmax = vf.max().item()
except Exception:
vmin = vmax = float('nan')
print(f"[wo_b-inspect L{layer_idx}] {attr}: "
f"dtype={v.dtype} shape={tuple(v.shape)} "
f"nan_frac={nf:.4f} inf={inf} min={vmin:.4e} max={vmax:.4e}")
result = self.wo_b(z.flatten(1))
# NaN-trace: check final wo_b output