Nemotron: use per-layer config in NemotronHMLPDecoderLayer for heterogeneous models (#35396)

Signed-off-by: dafrimi <dafrimi@nvidia.com>
This commit is contained in:
danielafrimi
2026-02-26 23:55:19 +02:00
committed by GitHub
parent 98217b09f9
commit 832a780f3a

View File

@@ -298,6 +298,11 @@ class NemotronHMLPDecoderLayer(nn.Module):
hybrid_override_pattern = config.hybrid_override_pattern
mlp_index = hybrid_override_pattern[: layer_idx + 1].count("-") - 1
# Get per-layer config for heterogeneous models if exist
get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
layer_config = get_layer_config(layer_idx) if get_layer_config else config
config = layer_config
if isinstance(config.intermediate_size, list):
if len(config.intermediate_size) == 1:
intermediate_size = config.intermediate_size[0]