Print both MoE and residual cosine

This commit is contained in:
2026-05-17 22:56:56 +00:00
parent 8637020487
commit bf99ad49ec

View File

@@ -142,10 +142,11 @@ def main():
ref_out[t] += w * (act @ down_bf16.T)
ref_hidden = ref_out + ref_hidden_saved # Residual
cos_moe = F.cosine_similarity(ref_out.flatten().unsqueeze(0), run_out.flatten().unsqueeze(0)).item()
cos = F.cosine_similarity(ref_hidden.flatten().unsqueeze(0), run_hidden.flatten().unsqueeze(0)).item()
has_nan = torch.isnan(run_hidden).any().item()
has_inf = torch.isinf(run_hidden).any().item()
print(f"Layer {layer}: cosine={cos:.6f} ref_amax={ref_hidden.amax().item():.4f} run_amax={run_hidden.amax().item():.4f} NaN={has_nan} Inf={has_inf}")
print(f"Layer {layer}: MoE_cosine={cos_moe:.6f} Residual_cosine={cos:.6f} ref_amax={ref_hidden.amax().item():.4f} run_amax={run_hidden.amax().item():.4f} NaN={has_nan} Inf={has_inf}")
if has_nan:
print(f" ❌ NaN detected after layer {layer}! Stopping.")