[Transformers backend] Ignore MTP weights when num_nextn_predict_layers=0 (#34888)
Signed-off-by: SteadfastAsArt <695488173@qq.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -300,14 +300,26 @@ class Base(
|
||||
for child_name, child_module in module.named_children():
|
||||
new_module = child_module
|
||||
qual_name = maybe_prefix(prefix, child_name)
|
||||
# Populate Eagle3 attrs
|
||||
if (
|
||||
isinstance(module, nn.ModuleList)
|
||||
and len(module) == self.text_config.num_hidden_layers
|
||||
):
|
||||
# Populate Eagle3 attrs
|
||||
self._target_class = type(child_module)
|
||||
layer_name = qual_name.removeprefix("model.")
|
||||
self._layer_names[int(child_name)] = layer_name
|
||||
# MTP weights should not be loaded into the base model
|
||||
num_hidden_layers = self.text_config.num_hidden_layers
|
||||
names = (
|
||||
"n_predict", # Override from SpeculativeConfig
|
||||
"num_nextn_predict_layers", # Most models
|
||||
"mtp_num_hidden_layers", # Qwen 3.5
|
||||
)
|
||||
n_predict = getattr_iter(self.text_config, names, 0)
|
||||
for i in range(num_hidden_layers, num_hidden_layers + n_predict):
|
||||
mtp_prefix = f"{prefix}.{i}."
|
||||
if mtp_prefix not in self.ignore_unexpected_prefixes:
|
||||
self.ignore_unexpected_prefixes.append(mtp_prefix)
|
||||
# Replace modules as needed
|
||||
if isinstance(child_module, nn.Linear):
|
||||
generator = (p for p in tp_plan if re.match(p, qual_name))
|
||||
|
||||
@@ -311,8 +311,9 @@ class AutoWeightsLoader:
|
||||
|
||||
continue
|
||||
|
||||
named_parameters = module.named_parameters(recurse=True)
|
||||
desc_param_keys = {
|
||||
base_prefix + k for k, _ in module.named_parameters(recurse=True)
|
||||
maybe_prefix(base_prefix, k) for k, _ in named_parameters
|
||||
}
|
||||
msg = (
|
||||
f"There is no module or parameter named {prefix!r} "
|
||||
|
||||
Reference in New Issue
Block a user