From 685bce48b46ea1d6318911d24cb8a31f86c12803 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 15 May 2026 06:01:50 +0000 Subject: [PATCH] actually handle expert param mapping --- vllm/patches/deepseek_v4.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 2c54fcc8..4b55e095 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -211,18 +211,19 @@ class DeepseekV4FP8Config(Fp8Config): def make_deepseek_v4_expert_params_mapping( num_experts: int, ) -> list[tuple[str, str, int, str]]: + # Checkpoint uses gate_proj/up_proj/down_proj, model params use w13_/w2_ return [ ( "experts.w13_" if shard_id in ("w1", "w3") else "experts.w2_", - f"experts.{expert_id}.{weight_name}.", + f"experts.{expert_id}.{ckpt_name}.", expert_id, shard_id, ) for expert_id in range(num_experts) - for shard_id, weight_name in [ - ("w1", "w1"), - ("w2", "w2"), - ("w3", "w3"), + for shard_id, ckpt_name in [ + ("w1", "gate_proj"), + ("w2", "down_proj"), + ("w3", "up_proj"), ] ]