From 7b3a85346513dd0856be8859ffef06e602cf4dcc Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 15 May 2026 07:10:13 +0000 Subject: [PATCH] more debugging --- src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/kernel.py | 2 +- src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/kernel.py b/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/kernel.py index 5c8b3ab2..f1262d77 100644 --- a/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/kernel.py +++ b/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/kernel.py @@ -76,7 +76,7 @@ def cutlass_grouped_nvfp4_gemm( M_expert = token_indices.shape[0] # DEBUG: verify data going into GEMM - if MEGA_MOE_DEBUG and e < 3 and M_expert > 0: + if e < 3 and M_expert > 0: print(f"[GEMM-IN] expert={e} M={M_expert} N={N} K={K} " f"w shape={expert_w.shape} w_sf shape={expert_w_sf.shape} " f"w absmax={expert_w.view(torch.int8).abs().max().item()} " diff --git a/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py b/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py index 63ab1c21..971f33a2 100644 --- a/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py +++ b/src/nvfp4_megamoe_kernel/nvfp4_mega_moe.py @@ -306,6 +306,15 @@ def nvfp4_mega_moe_full( l1_w, l1_sf = transformed_l1_weights l2_w, l2_sf = transformed_l2_weights + # Expert sanity check — are experts actually distinct? + if not getattr(self, '_expert_sanity', False): + self._expert_sanity = True + for e in range(min(4, l1_w.shape[0])): + w_sample = l1_w[e].view(torch.uint8)[:8, :8] + sf_sample = l1_sf[e].to(torch.float32)[:4, :4] + print(f"[EXPERT-SANITY e={e}] w_bytes[:8,:8]={w_sample.flatten().tolist()[:16]}") + print(f"[EXPERT-SANITY e={e}] sf[:4,:4]={sf_sample.flatten().tolist()[:8]}") + # Step 1: Read staged activation from symm_buffer x_fp4 = symm_buffer.x[:num_tokens] x_sf = symm_buffer.x_sf[:num_tokens]