diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 7b67e132..73037577 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -1650,17 +1650,25 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper: suffix_renames = { "head.weight": "lm_head.weight", "embed.weight": "embed_tokens.weight", - ".ffn_norm.weight": ".ffn.norm_gate.norm.weight", - ".ffn.gate.weight": ".ffn.norm_gate.gate.weight", - ".ffn.gate.bias": ".ffn.norm_gate.e_score_correction_bias", - ".ffn.gate.tid2eid": ".ffn.norm_gate.tid2eid", } + # NOTE: specific renames MUST come before general ones (applied in order) substr_renames = { + # Attention projections (specific before .self_attn. → .attn.) + ".self_attn.q_a_proj.": ".attn.wq_a.", + ".self_attn.kv_proj.": ".attn.wkv.", + ".self_attn.q_b_proj.": ".attn.wq_b.", + ".self_attn.o_a_proj.": ".attn.wo_a.", + ".self_attn.o_b_proj.": ".attn.wo_b.", + ".self_attn.q_a_norm.": ".attn.q_a_norm.", + ".self_attn.kv_norm.": ".attn.kv_norm.", + ".self_attn.sinks": ".attn.sinks", ".attn.compressor.": ".attn.mla_attn.compressor.", + # Shared expert projections (specific before .mlp. → .ffn.) ".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.", ".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.", ".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.", + # General renames ".mlp.": ".ffn.", ".self_attn.": ".attn.", }