debug more4

2026-05-14 22:50:51 +00:00
parent fd5f04eb15
commit e46ff41569
2 changed files with 46 additions and 0 deletions
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -2202,6 +2202,24 @@ class DeepseekV4ForCausalLM(nn.Module):
        if os.environ.get('NVFP4_DEBUG_SYNC', '') == '1':
            torch.cuda.synchronize()
            print("[NVFP4] post-load conversion done, CUDA OK")
+        
+        # Post-load NaN scale scan — find any scale tensors that are NaN
+        # after weight loading + post-load conversion
+        nan_attrs = []
+        for name, module in self.named_modules():
+            for attr in ('weight_scale', 'weight_scale_inv', 'weight_scale_2',
+                         'input_scale', 'act_scale'):
+                if hasattr(module, attr):
+                    t = getattr(module, attr)
+                    if torch.is_tensor(t) and torch.isnan(t.to(torch.float32)).any().item():
+                        nan_attrs.append((name, attr, tuple(t.shape), str(t.dtype)))
+        if nan_attrs:
+            print(f"[POST-LOAD] {len(nan_attrs)} NaN scale tensors after loading:")
+            for n, a, s, d in nan_attrs[:20]:
+                print(f"  {n}.{a} shape={s} dtype={d}")
+        else:
+            print("[POST-LOAD] No NaN scale tensors found — scales are clean")
+        
        return loaded_params

    def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
--- a/vllm/patches/deepseek_v4_attention.py
+++ b/vllm/patches/deepseek_v4_attention.py
@@ -377,6 +377,34 @@ class DeepseekV4MultiHeadLatentAttentionWrapper(PluggableLayer):
                      f"inf_any={torch.isinf(zf32).any().item()} "
                      f"shape={tuple(z.shape)} dtype={z.dtype}")

+        # wo_b inspection — dump all tensor attributes once
+        if _debug and not hasattr(self, '_wo_b_inspected'):
+            self._wo_b_inspected = True
+            layer_idx = getattr(self, 'layer_idx', None) or getattr(self, 'layer_name', '?')
+            print(f"[wo_b-inspect L{layer_idx}] type={type(self.wo_b).__name__}")
+            print(f"[wo_b-inspect L{layer_idx}] z (input) nan_frac="
+                  f"{torch.isnan(z.to(torch.float32)).float().mean().item():.4f} "
+                  f"abs_max={z.to(torch.float32).abs().max().item():.4e}")
+            for attr in dir(self.wo_b):
+                if attr.startswith('_'):
+                    continue
+                try:
+                    v = getattr(self.wo_b, attr)
+                except Exception:
+                    continue
+                if torch.is_tensor(v):
+                    vf = v.to(torch.float32) if v.dtype not in (torch.float32,) else v
+                    nf = torch.isnan(vf).float().mean().item()
+                    inf = torch.isinf(vf).any().item()
+                    try:
+                        vmin = vf.min().item()
+                        vmax = vf.max().item()
+                    except Exception:
+                        vmin = vmax = float('nan')
+                    print(f"[wo_b-inspect L{layer_idx}] {attr}: "
+                          f"dtype={v.dtype} shape={tuple(v.shape)} "
+                          f"nan_frac={nf:.4f} inf={inf} min={vmin:.4e} max={vmax:.4e}")
+
        result = self.wo_b(z.flatten(1))

        # NaN-trace: check final wo_b output