From 086f3fa5c51fcd031c1baf725df2f5c4207125fe Mon Sep 17 00:00:00 2001 From: biondizzle Date: Fri, 15 May 2026 01:29:00 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20hc=20params=20dot=E2=86=92underscore=20+?= =?UTF-8?q?=20compressor=20position=5Fbias=E2=86=92ape=20combined=20rule?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two fixes: 1. attn_hc.base → hc_attn_base (underscore not dot before base/fn/scale) Same for fn, scale, and ffn_hc variants. 2. compressor.position_bias → compressor.ape was never firing because the .self_attn.compressor. rule matched first (break). Added combined .self_attn.compressor.position_bias → .attn.mla_attn.compressor.ape. --- vllm/patches/deepseek_v4.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 1854f922..8e2f187b 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -1283,6 +1283,7 @@ class DeepseekV4Model(nn.Module): ".self_attn.compressor.kv_proj.": ".attn.mla_attn.compressor.wkv.", ".self_attn.compressor.gate_proj.": ".attn.mla_attn.compressor.gate.", ".self_attn.compressor.kv_norm.": ".attn.kv_norm.", + ".self_attn.compressor.position_bias": ".attn.mla_attn.compressor.ape", ".self_attn.compressor.": ".attn.mla_attn.compressor.", # Shared expert projections (stacking into gate_up_proj) # Must include .mlp. prefix since break prevents .mlp.→.ffn. from @@ -1290,9 +1291,15 @@ class DeepseekV4Model(nn.Module): ".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.", ".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.", ".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.", - # Hadamard coding params - ".attn_hc.": ".hc_attn.", - ".ffn_hc.": ".hc_ffn.", + # Hadamard coding params: checkpoint has .attn_hc.base/fn/scale + # and .ffn_hc.base/fn/scale; model has hc_attn_base/fn/scale + # and hc_ffn_base/fn/scale (underscore not dot before base/fn/scale) + ".attn_hc.base": "hc_attn_base", + ".attn_hc.fn": "hc_attn_fn", + ".attn_hc.scale": "hc_attn_scale", + ".ffn_hc.base": "hc_ffn_base", + ".ffn_hc.fn": "hc_ffn_fn", + ".ffn_hc.scale": "hc_ffn_scale", "hc_head.hc_base": "hc_head_base", "hc_head.hc_fn": "hc_head_fn", "hc_head.hc_scale": "hc_head_scale",