From e6ed9facf3fa4c27578085eaf296a4158b3afb35 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Fri, 15 May 2026 00:39:37 +0000
Subject: [PATCH] =?UTF-8?q?fix:=20indexer=20+=20shared=5Fexperts=20+=20com?=
 =?UTF-8?q?pressor=20checkpoint=E2=86=92model=20key=20renames?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three categories of missed renames in CKPT_KEY_SUBST:

1. Shared experts: .shared_experts.gate_proj.→.ffn.shared_experts.w1. fired
   but break prevented .mlp.→.ffn. from also applying, producing
   mlp.ffn.shared_experts.w1. (double prefix). Fixed by including .mlp.
   in the pattern. Added missing .shared_experts.down_proj. rule.

2. Indexer (layers 2+): .self_attn.compressor.indexer.* was caught by the
   generic .self_attn.compressor.→.attn.mla_attn.compressor. rule, producing
   wrong path attn.mla_attn.compressor.indexer.* instead of attn.indexer.*.
   Added indexer-specific patterns (q_b_proj→wq_b, kv_norm→k_norm,
   position_bias→compressor.ape, gate_proj→compressor.wgate,
   kv_proj→compressor.wkv) before the generic compressor rule.

3. Compressor kv_proj/gate_proj: old .compressor.kv_proj.→.compressor.wkv.
   pattern could never fire because .self_attn.compressor. matched first
   (break). Merged into combined patterns that handle both the
   self_attn.compressor→attn.mla_attn.compressor path AND the projection
   rename in one step.
---
 vllm/patches/deepseek_v4.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py
index a6d7aa16..94c183db 100644
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -1270,16 +1270,26 @@ class DeepseekV4Model(nn.Module):
             ".self_attn.sinks": ".attn.attn_sink",
             ".self_attn.kv_proj.": ".attn.wkv.",
             ".self_attn.kv_norm.": ".attn.kv_norm.",
+            # Indexer: self_attn.compressor.indexer → attn.indexer
+            # MUST come before the generic .self_attn.compressor. rule
+            ".self_attn.compressor.indexer.q_b_proj.": ".attn.indexer.wq_b.",
+            ".self_attn.compressor.indexer.kv_norm.": ".attn.indexer.k_norm.",
+            ".self_attn.compressor.indexer.position_bias": ".attn.indexer.compressor.ape",
+            ".self_attn.compressor.indexer.gate_proj.": ".attn.indexer.compressor.wgate.",
+            ".self_attn.compressor.indexer.kv_proj.": ".attn.indexer.compressor.wkv.",
+            ".self_attn.compressor.indexer.": ".attn.indexer.",
             # Compressor: self_attn.compressor → attn.mla_attn.compressor
+            # Compressor projections for stacking (fused_wkv_wgate)
+            ".self_attn.compressor.kv_proj.": ".attn.mla_attn.compressor.wkv.",
+            ".self_attn.compressor.gate_proj.": ".attn.mla_attn.compressor.gate.",
             ".self_attn.compressor.kv_norm.": ".attn.kv_norm.",
             ".self_attn.compressor.": ".attn.mla_attn.compressor.",
-            # Compressor projections for stacking (fused_wkv_wgate)
-            ".compressor.kv_proj.": ".compressor.wkv.",
-            ".compressor.gate_proj.": ".compressor.gate.",
             # Shared expert projections (stacking into gate_up_proj)
-            # Checkpoint has .shared_experts. but model has .ffn.shared_experts.
-            ".shared_experts.gate_proj.": ".ffn.shared_experts.w1.",
-            ".shared_experts.up_proj.": ".ffn.shared_experts.w3.",
+            # Must include .mlp. prefix since break prevents .mlp.→.ffn. from
+            # firing on the same key after these patterns match.
+            ".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.",
+            ".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.",
+            ".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.",
             # modelopt uses mlp, vllm uses ffn internally
             ".mlp.": ".ffn.",
         }