From bcd7a0cf0d87b9197066f1ac8d6d59152b925441 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Mon, 1 Jun 2026 04:08:21 +0000
Subject: [PATCH] diag: check SE weight and scale integrity for first 3 layers

---
 single_shot_inference.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/single_shot_inference.py b/single_shot_inference.py
index 0be94b1d..8233b7fe 100644
--- a/single_shot_inference.py
+++ b/single_shot_inference.py
@@ -424,6 +424,13 @@ def moe_forward(x, li, moe_runner, se_runner, router, token_id):
         has_nan = torch.isnan(shared_out).any().item()
         out_max = shared_out.abs().max().item() if not has_nan else float('nan')
         print(f"  L{li} MoE shared: |out|={out_max:.4f} has_nan={has_nan}", flush=True)
+        # Check weight integrity
+        if hasattr(se_runner, '_l1_mat_b') and se_runner._l1_mat_b is not None:
+            wb = se_runner._l1_mat_b.view(torch.uint8)
+            print(f"  L{li} SE l1 weight: shape={list(se_runner._l1_mat_b.shape)} dtype={se_runner._l1_mat_b.dtype} uint8_range=[{wb.min().item()},{wb.max().item()}]", flush=True)
+        if hasattr(se_runner, '_l1_scale_b') and se_runner._l1_scale_b is not None:
+            sb = se_runner._l1_scale_b
+            print(f"  L{li} SE l1 scale: shape={list(sb.shape)} dtype={sb.dtype} range=[{sb.min().item():.6f},{sb.max().item():.6f}] has_nan={torch.isnan(sb).any().item()}", flush=True)
         print(f"  L{li} SE gsa: l1={se_runner._l1_activation_global_scale:.6f} l2={se_runner._l2_activation_global_scale:.6f} gsb: l1={se_runner._l1_gsb[0].item():.6f} l2={se_runner._l2_gsb[0].item():.6f}", flush=True)
     return routed_out + shared_out