From 6f9a400ae063e18d3fccd520ebde59fa59cb4876 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 19 May 2026 03:58:25 +0000 Subject: [PATCH] Fix hc_head mapping: checkpoint uses hc_head.hc_fn, model params are flat hc_head_fn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Removed hc_head prefix mapping (checkpoint already has model.hc_head.*) - Fixed substr: hc_head.hc_fn→hc_head_fn (not hc_head.fn→hc_head_fn) - The model has self.hc_head_fn as flat params, not inside a sub-module --- tests/test_nvfp4_mapper.py | 15 +++++++-------- vllm/patches/deepseek_v4.py | 9 +++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_nvfp4_mapper.py b/tests/test_nvfp4_mapper.py index c9d4e303..b7610587 100644 --- a/tests/test_nvfp4_mapper.py +++ b/tests/test_nvfp4_mapper.py @@ -48,7 +48,8 @@ def make_nvfp4_mapper() -> WeightsMapper: "layers.": "model.layers.", "embed.": "model.embed.", "norm.": "model.norm.", - "hc_head": "model.hc_head", + # hc_head NOT mapped — checkpoint already has model.hc_head.* + # and model params are flat (hc_head_fn, not hc_head.fn) "mtp.": "model.mtp.", }, orig_to_new_regex=expert_rename_regex, @@ -93,9 +94,9 @@ def make_nvfp4_mapper() -> WeightsMapper: ".ffn_hc.fn": ".hc_ffn_fn", ".ffn_hc.base": ".hc_ffn_base", ".ffn_hc.scale": ".hc_ffn_scale", - "hc_head.fn": "hc_head_fn", - "hc_head.base": "hc_head_base", - "hc_head.scale": "hc_head_scale", + "hc_head.hc_fn": "hc_head_fn", + "hc_head.hc_base": "hc_head_base", + "hc_head.hc_scale": "hc_head_scale", }, ) @@ -156,10 +157,8 @@ def test_mapper(): ("layers.0.attn_hc.fn", "model.layers.0.hc_attn_fn"), ("layers.0.ffn_hc.scale", "model.layers.0.hc_ffn_scale"), - # Global params - ("embed.weight", "model.embed_tokens.weight"), - ("norm.weight", "model.norm.weight"), - ("hc_head.fn", "model.hc_head_fn"), + # HC head (checkpoint has model.hc_head.hc_fn, model params are flat hc_head_fn) + ("hc_head.hc_fn", "hc_head_fn"), # MTP (already uses ffn prefix in checkpoint) ("mtp.0.ffn.experts.0.w1.weight", "model.mtp.0.ffn.experts.0.w1.weight"), diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index e079de8e..0b1df975 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -1640,7 +1640,8 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper: "layers.": "model.layers.", "embed.": "model.embed.", "norm.": "model.norm.", - "hc_head": "model.hc_head", + # hc_head NOT mapped here — checkpoint already has model.hc_head.* + # and model params are flat (hc_head_fn, not hc_head.fn) "mtp.": "model.mtp.", }, orig_to_new_regex=expert_rename_regex, @@ -1697,9 +1698,9 @@ def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper: ".ffn_hc.fn": ".hc_ffn_fn", ".ffn_hc.base": ".hc_ffn_base", ".ffn_hc.scale": ".hc_ffn_scale", - "hc_head.fn": "hc_head_fn", - "hc_head.base": "hc_head_base", - "hc_head.scale": "hc_head_scale", + "hc_head.hc_fn": "hc_head_fn", + "hc_head.hc_base": "hc_head_base", + "hc_head.hc_scale": "hc_head_scale", }, )