From 2cbc7459b0cd8513c346cf2afa7e0f755d85ff86 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 1 Jun 2026 04:14:47 +0000 Subject: [PATCH] diag: fix SE scale print (cast to float first) --- single_shot_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/single_shot_inference.py b/single_shot_inference.py index 8233b7fe..245057b5 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -429,8 +429,8 @@ def moe_forward(x, li, moe_runner, se_runner, router, token_id): wb = se_runner._l1_mat_b.view(torch.uint8) print(f" L{li} SE l1 weight: shape={list(se_runner._l1_mat_b.shape)} dtype={se_runner._l1_mat_b.dtype} uint8_range=[{wb.min().item()},{wb.max().item()}]", flush=True) if hasattr(se_runner, '_l1_scale_b') and se_runner._l1_scale_b is not None: - sb = se_runner._l1_scale_b - print(f" L{li} SE l1 scale: shape={list(sb.shape)} dtype={sb.dtype} range=[{sb.min().item():.6f},{sb.max().item():.6f}] has_nan={torch.isnan(sb).any().item()}", flush=True) + sb = se_runner._l1_scale_b.float() + print(f" L{li} SE l1 scale: shape={list(se_runner._l1_scale_b.shape)} dtype={se_runner._l1_scale_b.dtype} float_range=[{sb.min().item():.6f},{sb.max().item():.6f}] has_nan={torch.isnan(sb).any().item()}", flush=True) print(f" L{li} SE gsa: l1={se_runner._l1_activation_global_scale:.6f} l2={se_runner._l2_activation_global_scale:.6f} gsb: l1={se_runner._l1_gsb[0].item():.6f} l2={se_runner._l2_gsb[0].item():.6f}", flush=True) return routed_out + shared_out