From d41a48aa1fd5599614cd2ea0b3c25eb339eea779 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Mon, 18 May 2026 23:54:02 +0000
Subject: [PATCH] Fix KeyError for missing stacked params (indexer.compressor)

Not all layers have the same indexer structure. The stacking path
was trying to access params that don't exist in params_dict. Added
checks to skip missing stacked params instead of KeyError.
---
 vllm/patches/deepseek_v4.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py
index 07046346..29f1c4f6 100644
--- a/vllm/patches/deepseek_v4.py
+++ b/vllm/patches/deepseek_v4.py
@@ -1492,6 +1492,11 @@ class DeepseekV4Model(nn.Module):
 
                 if is_pp_missing_parameter(name, self):
                     break
+                if name not in params_dict:
+                    # The stacked param doesn't exist — skip
+                    # (e.g. indexer.compressor.fused_wkv_wgate on layers
+                    # that don't have the full indexer structure)
+                    break
                 param = params_dict[name]
                 weight_loader = param.weight_loader
 
@@ -1509,6 +1514,14 @@ class DeepseekV4Model(nn.Module):
                         "weight_scale_2",
                     ))
                 )
+                if is_compressor_scale:
+                    # Verify the fused param exists before buffering
+                    if name not in params_dict:
+                        print(
+                            f"COMPRESSOR_SCALE_SKIP: {name} not in params_dict",
+                            flush=True,
+                        )
+                        break
                 if is_compressor_scale:
                     # Buffer the shard for later concatenation
                     if name not in compressor_scale_buffer: