diff --git a/single_shot_inference.py b/single_shot_inference.py index 4f87c086..1f47d0c1 100644 --- a/single_shot_inference.py +++ b/single_shot_inference.py @@ -444,8 +444,8 @@ def forward_layer(X_l, w, li, cfg, rope_cos, rope_sin, # ===================================================================== def _load_moe_weights_stacked(all_w, li, pfx, dev, moe, cfg): n_e = cfg["n_routed_experts"] - l1_fp4_list, l1_sf_list, l1_gs_list, l1_ws2_list = [], [], [], [] - l2_fp4_list, l2_sf_list, l2_gs_list, l2_ws2_list = [], [], [], [] + l1_fp4_list, l1_sf_list, l1_gs_list, l1_ws2_list, l1_gsa_list = [], [], [], [], [] + l2_fp4_list, l2_sf_list, l2_gs_list, l2_ws2_list, l2_gsa_list = [], [], [], [], [] for eid in range(n_e): ep = f"{pfx}.experts.{eid}" gw, gws, gws2, gisc = get_nvfp4_weight(all_w, ep, 'gate_proj')