more fixes3
This commit is contained in:
@@ -1418,6 +1418,8 @@ class DeepseekV4Model(nn.Module):
|
||||
continue
|
||||
continue
|
||||
elif "attn_sink" in name:
|
||||
if name not in params_dict:
|
||||
continue
|
||||
narrow_weight = loaded_weight[head_rank_start:head_rank_end]
|
||||
n = narrow_weight.shape[0]
|
||||
params_dict[name][:n].copy_(narrow_weight)
|
||||
|
||||
Reference in New Issue
Block a user