Fix weight loading: skip already-loaded experts correctly

This commit is contained in:
2026-05-17 18:15:51 +00:00
parent 955d7533f2
commit c1bb551446

View File

@@ -55,7 +55,7 @@ def load_expert_weights(layer_idx, num_experts):
for shard_path in shards:
with safe_open(shard_path, framework="pt", device="cpu") as f:
for e in range(num_experts):
if len(experts) > e:
if e < len(experts):
continue
prefix = f"model.layers.{layer_idx}.mlp.experts.{e}"
gate_w = f"{prefix}.gate_proj.weight"