Fix KeyError for missing stacked params (indexer.compressor)

Not all layers have the same indexer structure. The stacking path
was trying to access params that don't exist in params_dict. Added
checks to skip missing stacked params instead of KeyError.
This commit is contained in:
2026-05-18 23:54:02 +00:00
parent 4b0d8263f6
commit d41a48aa1f

View File

@@ -1492,6 +1492,11 @@ class DeepseekV4Model(nn.Module):
if is_pp_missing_parameter(name, self):
break
if name not in params_dict:
# The stacked param doesn't exist — skip
# (e.g. indexer.compressor.fused_wkv_wgate on layers
# that don't have the full indexer structure)
break
param = params_dict[name]
weight_loader = param.weight_loader
@@ -1509,6 +1514,14 @@ class DeepseekV4Model(nn.Module):
"weight_scale_2",
))
)
if is_compressor_scale:
# Verify the fused param exists before buffering
if name not in params_dict:
print(
f"COMPRESSOR_SCALE_SKIP: {name} not in params_dict",
flush=True,
)
break
if is_compressor_scale:
# Buffer the shard for later concatenation
if name not in compressor_scale_buffer: