From c5d800f133e8f950c75ef88a8a028404ef1e1247 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Fri, 15 May 2026 05:41:12 +0000
Subject: [PATCH] can we see the wt in?

---
 src/nvfp4_megamoe_kernel/weight_transform.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/nvfp4_megamoe_kernel/weight_transform.py b/src/nvfp4_megamoe_kernel/weight_transform.py
index 0e01033e..4e2d4696 100644
--- a/src/nvfp4_megamoe_kernel/weight_transform.py
+++ b/src/nvfp4_megamoe_kernel/weight_transform.py
@@ -79,6 +79,11 @@ def transform_nvfp4_weights_for_mega_moe(
     l1_weight, l1_weight_scale = l1_tuple
     l2_weight, l2_weight_scale = l2_tuple
 
+    # DEBUG: check weights BEFORE transform
+    print(f"[WT-IN] l1_w shape={l1_weight.shape} absmax={l1_weight.view(torch.int8).abs().max().item()} "
+          f"l1_sf shape={l1_weight_scale.shape} sf_absmax={l1_weight_scale.view(torch.uint8).abs().max().item()} "
+          f"l2_w shape={l2_weight.shape} absmax={l2_weight.view(torch.int8).abs().max().item()}")
+
     # Fold global scales into block scales
     # The logical_widths branch was wrong: it treated gs as per-projection
     # scalars and only used experts 0 and 1's scales for ALL experts.
@@ -107,4 +112,8 @@ def transform_nvfp4_weights_for_mega_moe(
     l1_sf_out = l1_sf_out.transpose(-2, -1).contiguous()
     l2_sf_out = l2_sf_out.transpose(-2, -1).contiguous()
 
+    # DEBUG: check weights AFTER transform
+    print(f"[WT-OUT] l1_w shape={l1_weight_out.shape} absmax={l1_weight_out.view(torch.int8).abs().max().item()} "
+          f"l1_sf shape={l1_sf_out.shape} sf_absmax={l1_sf_out.view(torch.uint8).abs().max().item()}")
+
     return (l1_weight_out, l1_sf_out), (l2_weight_out, l2_sf_out)