actually handle expert param mapping

2026-05-15 06:01:50 +00:00
parent f17efa340d
commit 685bce48b4
1 changed files with 6 additions and 5 deletions
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -211,18 +211,19 @@ class DeepseekV4FP8Config(Fp8Config):
 def make_deepseek_v4_expert_params_mapping(
    num_experts: int,
 ) -> list[tuple[str, str, int, str]]:
+    # Checkpoint uses gate_proj/up_proj/down_proj, model params use w13_/w2_
    return [
        (
            "experts.w13_" if shard_id in ("w1", "w3") else "experts.w2_",
-            f"experts.{expert_id}.{weight_name}.",
+            f"experts.{expert_id}.{ckpt_name}.",
            expert_id,
            shard_id,
        )
        for expert_id in range(num_experts)
-        for shard_id, weight_name in [
-            ("w1", "w1"),
-            ("w2", "w2"),
-            ("w3", "w3"),
+        for shard_id, ckpt_name in [
+            ("w1", "gate_proj"),
+            ("w2", "down_proj"),
+            ("w3", "up_proj"),
        ]
    ]