Fix NVFP4 mapper: add attention projection renames, remove norm_gate renames

- Add specific .self_attn.{q_a,kv,q_b,o_a,o_b}_proj → .attn.{wq_a,wkv,wq_b,wo_a,wo_b} - Remove norm_gate suffix renames (nightly uses 'gate' not 'norm_gate') - Order substr renames: specific before general
2026-05-18 22:53:09 +00:00
parent ea648a9bc2
commit b039123207
1 changed files with 12 additions and 4 deletions
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -1650,17 +1650,25 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper:
    suffix_renames = {
        "head.weight": "lm_head.weight",
        "embed.weight": "embed_tokens.weight",
-        ".ffn_norm.weight": ".ffn.norm_gate.norm.weight",
-        ".ffn.gate.weight": ".ffn.norm_gate.gate.weight",
-        ".ffn.gate.bias": ".ffn.norm_gate.e_score_correction_bias",
-        ".ffn.gate.tid2eid": ".ffn.norm_gate.tid2eid",
    }

+    # NOTE: specific renames MUST come before general ones (applied in order)
    substr_renames = {
+        # Attention projections (specific before .self_attn. → .attn.)
+        ".self_attn.q_a_proj.": ".attn.wq_a.",
+        ".self_attn.kv_proj.": ".attn.wkv.",
+        ".self_attn.q_b_proj.": ".attn.wq_b.",
+        ".self_attn.o_a_proj.": ".attn.wo_a.",
+        ".self_attn.o_b_proj.": ".attn.wo_b.",
+        ".self_attn.q_a_norm.": ".attn.q_a_norm.",
+        ".self_attn.kv_norm.": ".attn.kv_norm.",
+        ".self_attn.sinks": ".attn.sinks",
        ".attn.compressor.": ".attn.mla_attn.compressor.",
+        # Shared expert projections (specific before .mlp. → .ffn.)
        ".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.",
        ".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.",
        ".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.",
+        # General renames
        ".mlp.": ".ffn.",
        ".self_attn.": ".attn.",
    }