From b85fcf4d6f3787cd7e653f545cdda20078819b5f Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 1 Jun 2026 02:49:55 +0000 Subject: [PATCH] diag: print SE global scales for first 3 layers --- single_shot_inference.py | 1 + 1 file changed, 1 insertion(+) diff --git a/single_shot_inference.py b/single_shot_inference.py index 9c011392..2d68dbf3 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -422,6 +422,7 @@ def moe_forward(x, li, moe_runner, se_runner, router, token_id): shared_out = se_runner.run(x) if li < 3: print(f" L{li} MoE shared: |out|={shared_out.abs().max().item():.4f} has_nan={torch.isnan(shared_out).any().item()}", flush=True) + print(f" L{li} SE gsa: l1={se_runner._l1_activation_global_scale:.6f} l2={se_runner._l2_activation_global_scale:.6f} gsb: l1={se_runner._l1_gsb[0].item():.6f} l2={se_runner._l2_gsb[0].item():.6f}", flush=True) return routed_out + shared_out # =====================================================================