From b039123207b3f320cd69f6a05e81babb40bb5ee7 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 18 May 2026 22:53:09 +0000 Subject: [PATCH] Fix NVFP4 mapper: add attention projection renames, remove norm_gate renames MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add specific .self_attn.{q_a,kv,q_b,o_a,o_b}_proj → .attn.{wq_a,wkv,wq_b,wo_a,wo_b} - Remove norm_gate suffix renames (nightly uses 'gate' not 'norm_gate') - Order substr renames: specific before general --- vllm/patches/deepseek_v4.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 7b67e132..73037577 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -1650,17 +1650,25 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper: suffix_renames = { "head.weight": "lm_head.weight", "embed.weight": "embed_tokens.weight", - ".ffn_norm.weight": ".ffn.norm_gate.norm.weight", - ".ffn.gate.weight": ".ffn.norm_gate.gate.weight", - ".ffn.gate.bias": ".ffn.norm_gate.e_score_correction_bias", - ".ffn.gate.tid2eid": ".ffn.norm_gate.tid2eid", } + # NOTE: specific renames MUST come before general ones (applied in order) substr_renames = { + # Attention projections (specific before .self_attn. → .attn.) + ".self_attn.q_a_proj.": ".attn.wq_a.", + ".self_attn.kv_proj.": ".attn.wkv.", + ".self_attn.q_b_proj.": ".attn.wq_b.", + ".self_attn.o_a_proj.": ".attn.wo_a.", + ".self_attn.o_b_proj.": ".attn.wo_b.", + ".self_attn.q_a_norm.": ".attn.q_a_norm.", + ".self_attn.kv_norm.": ".attn.kv_norm.", + ".self_attn.sinks": ".attn.sinks", ".attn.compressor.": ".attn.mla_attn.compressor.", + # Shared expert projections (specific before .mlp. → .ffn.) ".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.", ".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.", ".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.", + # General renames ".mlp.": ".ffn.", ".self_attn.": ".attn.", }