more debugging
This commit is contained in:
@@ -76,7 +76,7 @@ def cutlass_grouped_nvfp4_gemm(
|
||||
M_expert = token_indices.shape[0]
|
||||
|
||||
# DEBUG: verify data going into GEMM
|
||||
if MEGA_MOE_DEBUG and e < 3 and M_expert > 0:
|
||||
if e < 3 and M_expert > 0:
|
||||
print(f"[GEMM-IN] expert={e} M={M_expert} N={N} K={K} "
|
||||
f"w shape={expert_w.shape} w_sf shape={expert_w_sf.shape} "
|
||||
f"w absmax={expert_w.view(torch.int8).abs().max().item()} "
|
||||
|
||||
@@ -306,6 +306,15 @@ def nvfp4_mega_moe_full(
|
||||
l1_w, l1_sf = transformed_l1_weights
|
||||
l2_w, l2_sf = transformed_l2_weights
|
||||
|
||||
# Expert sanity check — are experts actually distinct?
|
||||
if not getattr(self, '_expert_sanity', False):
|
||||
self._expert_sanity = True
|
||||
for e in range(min(4, l1_w.shape[0])):
|
||||
w_sample = l1_w[e].view(torch.uint8)[:8, :8]
|
||||
sf_sample = l1_sf[e].to(torch.float32)[:4, :4]
|
||||
print(f"[EXPERT-SANITY e={e}] w_bytes[:8,:8]={w_sample.flatten().tolist()[:16]}")
|
||||
print(f"[EXPERT-SANITY e={e}] sf[:4,:4]={sf_sample.flatten().tolist()[:8]}")
|
||||
|
||||
# Step 1: Read staged activation from symm_buffer
|
||||
x_fp4 = symm_buffer.x[:num_tokens]
|
||||
x_sf = symm_buffer.x_sf[:num_tokens]
|
||||
|
||||
Reference in New Issue
Block a user