From 6f9a400ae063e18d3fccd520ebde59fa59cb4876 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Tue, 19 May 2026 03:58:25 +0000
Subject: [PATCH] Fix hc_head mapping: checkpoint uses hc_head.hc_fn, model
 params are flat hc_head_fn
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Removed hc_head prefix mapping (checkpoint already has model.hc_head.*)
- Fixed substr: hc_head.hc_fn→hc_head_fn (not hc_head.fn→hc_head_fn)
- The model has self.hc_head_fn as flat params, not inside a sub-module
---
 tests/test_nvfp4_mapper.py  | 15 +++++++--------
 vllm/patches/deepseek_v4.py |  9 +++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/test_nvfp4_mapper.py b/tests/test_nvfp4_mapper.py
index c9d4e303..b7610587 100644
--- a/tests/test_nvfp4_mapper.py
+++ b/tests/test_nvfp4_mapper.py
@@ -48,7 +48,8 @@ def make_nvfp4_mapper() -> WeightsMapper:
             "layers.": "model.layers.",
             "embed.": "model.embed.",
             "norm.": "model.norm.",
-            "hc_head": "model.hc_head",
+            # hc_head NOT mapped — checkpoint already has model.hc_head.*
+            # and model params are flat (hc_head_fn, not hc_head.fn)
             "mtp.": "model.mtp.",
         },
         orig_to_new_regex=expert_rename_regex,
@@ -93,9 +94,9 @@ def make_nvfp4_mapper() -> WeightsMapper:
             ".ffn_hc.fn": ".hc_ffn_fn",
             ".ffn_hc.base": ".hc_ffn_base",
             ".ffn_hc.scale": ".hc_ffn_scale",
-            "hc_head.fn": "hc_head_fn",
-            "hc_head.base": "hc_head_base",
-            "hc_head.scale": "hc_head_scale",
+            "hc_head.hc_fn": "hc_head_fn",
+            "hc_head.hc_base": "hc_head_base",
+            "hc_head.hc_scale": "hc_head_scale",
         },
     )
 
@@ -156,10 +157,8 @@ def test_mapper():
         ("layers.0.attn_hc.fn", "model.layers.0.hc_attn_fn"),
         ("layers.0.ffn_hc.scale", "model.layers.0.hc_ffn_scale"),
         
-        # Global params
-        ("embed.weight", "model.embed_tokens.weight"),
-        ("norm.weight", "model.norm.weight"),
-        ("hc_head.fn", "model.hc_head_fn"),
+        # HC head (checkpoint has model.hc_head.hc_fn, model params are flat hc_head_fn)
+        ("hc_head.hc_fn", "hc_head_fn"),
         
         # MTP (already uses ffn prefix in checkpoint)
         ("mtp.0.ffn.experts.0.w1.weight", "model.mtp.0.ffn.experts.0.w1.weight"),
diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py
index e079de8e..0b1df975 100644
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -1640,7 +1640,8 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper:
             "layers.": "model.layers.",
             "embed.": "model.embed.",
             "norm.": "model.norm.",
-            "hc_head": "model.hc_head",
+            # hc_head NOT mapped here — checkpoint already has model.hc_head.*
+            # and model params are flat (hc_head_fn, not hc_head.fn)
             "mtp.": "model.mtp.",
         },
         orig_to_new_regex=expert_rename_regex,
@@ -1697,9 +1698,9 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper:
             ".ffn_hc.fn": ".hc_ffn_fn",
             ".ffn_hc.base": ".hc_ffn_base",
             ".ffn_hc.scale": ".hc_ffn_scale",
-            "hc_head.fn": "hc_head_fn",
-            "hc_head.base": "hc_head_base",
-            "hc_head.scale": "hc_head_scale",
+            "hc_head.hc_fn": "hc_head_fn",
+            "hc_head.hc_base": "hc_head_base",
+            "hc_head.hc_scale": "hc_head_scale",
         },
     )