Fix weight loading: skip already-loaded experts correctly
This commit is contained in:
@@ -55,7 +55,7 @@ def load_expert_weights(layer_idx, num_experts):
|
||||
for shard_path in shards:
|
||||
with safe_open(shard_path, framework="pt", device="cpu") as f:
|
||||
for e in range(num_experts):
|
||||
if len(experts) > e:
|
||||
if e < len(experts):
|
||||
continue
|
||||
prefix = f"model.layers.{layer_idx}.mlp.experts.{e}"
|
||||
gate_w = f"{prefix}.gate_proj.weight"
|
||||
|
||||
Reference in New Issue
Block a user