Nemotron: use per-layer config in NemotronHMLPDecoderLayer for heterogeneous models (#35396)
Signed-off-by: dafrimi <dafrimi@nvidia.com>
This commit is contained in:
@@ -298,6 +298,11 @@ class NemotronHMLPDecoderLayer(nn.Module):
|
||||
|
||||
hybrid_override_pattern = config.hybrid_override_pattern
|
||||
mlp_index = hybrid_override_pattern[: layer_idx + 1].count("-") - 1
|
||||
# Get per-layer config for heterogeneous models if exist
|
||||
get_layer_config = getattr(config, "get_nemotron_h_config_for_layer", None)
|
||||
layer_config = get_layer_config(layer_idx) if get_layer_config else config
|
||||
config = layer_config
|
||||
|
||||
if isinstance(config.intermediate_size, list):
|
||||
if len(config.intermediate_size) == 1:
|
||||
intermediate_size = config.intermediate_size[0]
|
||||
|
||||
Reference in New Issue
Block a user