diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index 07046346..29f1c4f6 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -1492,6 +1492,11 @@ class DeepseekV4Model(nn.Module): if is_pp_missing_parameter(name, self): break + if name not in params_dict: + # The stacked param doesn't exist — skip + # (e.g. indexer.compressor.fused_wkv_wgate on layers + # that don't have the full indexer structure) + break param = params_dict[name] weight_loader = param.weight_loader @@ -1509,6 +1514,14 @@ class DeepseekV4Model(nn.Module): "weight_scale_2", )) ) + if is_compressor_scale: + # Verify the fused param exists before buffering + if name not in params_dict: + print( + f"COMPRESSOR_SCALE_SKIP: {name} not in params_dict", + flush=True, + ) + break if is_compressor_scale: # Buffer the shard for later concatenation if name not in compressor_scale_buffer: