diff --git a/vllm/patches/deepseek_v4.py b/vllm/patches/deepseek_v4.py index b71d93be..c7b08389 100644 --- a/vllm/patches/deepseek_v4.py +++ b/vllm/patches/deepseek_v4.py @@ -1418,6 +1418,8 @@ class DeepseekV4Model(nn.Module): continue continue elif "attn_sink" in name: + if name not in params_dict: + continue narrow_weight = loaded_weight[head_rank_start:head_rank_end] n = narrow_weight.shape[0] params_dict[name][:n].copy_(narrow_weight)