Fix KeyError for missing stacked params (indexer.compressor)
Not all layers have the same indexer structure. The stacking path was trying to access params that don't exist in params_dict. Added checks to skip missing stacked params instead of KeyError.
This commit is contained in:
@@ -1492,6 +1492,11 @@ class DeepseekV4Model(nn.Module):
|
||||
|
||||
if is_pp_missing_parameter(name, self):
|
||||
break
|
||||
if name not in params_dict:
|
||||
# The stacked param doesn't exist — skip
|
||||
# (e.g. indexer.compressor.fused_wkv_wgate on layers
|
||||
# that don't have the full indexer structure)
|
||||
break
|
||||
param = params_dict[name]
|
||||
weight_loader = param.weight_loader
|
||||
|
||||
@@ -1509,6 +1514,14 @@ class DeepseekV4Model(nn.Module):
|
||||
"weight_scale_2",
|
||||
))
|
||||
)
|
||||
if is_compressor_scale:
|
||||
# Verify the fused param exists before buffering
|
||||
if name not in params_dict:
|
||||
print(
|
||||
f"COMPRESSOR_SCALE_SKIP: {name} not in params_dict",
|
||||
flush=True,
|
||||
)
|
||||
break
|
||||
if is_compressor_scale:
|
||||
# Buffer the shard for later concatenation
|
||||
if name not in compressor_scale_buffer:
|
||||
|
||||
Reference in New Issue
Block a user