From 8904d409f800c99cdf625762a279bbee12efd717 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Tue, 19 May 2026 18:32:49 +0000
Subject: [PATCH] Fix MoE weight key names, add fallback

---
 tests/test_moe_nan_b200.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/tests/test_moe_nan_b200.py b/tests/test_moe_nan_b200.py
index e447fac6..43f79c7d 100644
--- a/tests/test_moe_nan_b200.py
+++ b/tests/test_moe_nan_b200.py
@@ -66,16 +66,21 @@ def test_moe_layer(layer_id=2):
     emb = G("model.embed_tokens.weight")
     fnorm = G(f"{p}.post_attention_layernorm.weight")
     
-    # MoE weights
-    # Gate/up (w13): (E, 2*intermediate, hidden//2) uint8
-    # Down (w2): (E, hidden, intermediate//2) uint8
-    w13_w = G(f"{m}.experts.w13_weight")  # or gate_proj + up_proj
-    w13_sf = G(f"{m}.experts.w13_weight_scale")
-    w13_gs = G(f"{m}.experts.w13_weight_scale_2")
-    w2_w = G(f"{m}.experts.w2_weight")
-    w2_sf = G(f"{m}.experts.w2_weight_scale")
-    w2_gs = G(f"{m}.experts.w2_weight_scale_2")
-    swiglu_limit = None
+    # MoE weights — NVFP4 packed format
+    try:
+        w13_w = G(f"{m}.experts.w13_weight")
+        w13_sf = G(f"{m}.experts.w13_weight_scale")
+        w13_gs = G(f"{m}.experts.w13_weight_scale_2")
+        w2_w = G(f"{m}.experts.w2_weight")
+        w2_sf = G(f"{m}.experts.w2_weight_scale")
+        w2_gs = G(f"{m}.experts.w2_weight_scale_2")
+    except (KeyError, RuntimeError) as e:
+        print(f"  ERROR: Could not load MoE weights: {e}")
+        print(f"  Available keys for this layer:")
+        for k in sorted(wm.keys()):
+            if 'layers.2.mlp' in k:
+                print(f"    {k}")
+        return
     
     # Shared expert
     se_gate_w = G(f"{m}.shared_experts.gate_proj.weight")