Print both MoE and residual cosine
This commit is contained in:
@@ -142,10 +142,11 @@ def main():
|
||||
ref_out[t] += w * (act @ down_bf16.T)
|
||||
ref_hidden = ref_out + ref_hidden_saved # Residual
|
||||
|
||||
cos_moe = F.cosine_similarity(ref_out.flatten().unsqueeze(0), run_out.flatten().unsqueeze(0)).item()
|
||||
cos = F.cosine_similarity(ref_hidden.flatten().unsqueeze(0), run_hidden.flatten().unsqueeze(0)).item()
|
||||
has_nan = torch.isnan(run_hidden).any().item()
|
||||
has_inf = torch.isinf(run_hidden).any().item()
|
||||
print(f"Layer {layer}: cosine={cos:.6f} ref_amax={ref_hidden.amax().item():.4f} run_amax={run_hidden.amax().item():.4f} NaN={has_nan} Inf={has_inf}")
|
||||
print(f"Layer {layer}: MoE_cosine={cos_moe:.6f} Residual_cosine={cos:.6f} ref_amax={ref_hidden.amax().item():.4f} run_amax={run_hidden.amax().item():.4f} NaN={has_nan} Inf={has_inf}")
|
||||
|
||||
if has_nan:
|
||||
print(f" ❌ NaN detected after layer {layer}! Stopping.")
|
||||
|
||||
Reference in New Issue
Block a user