Standardise get_rope to use rope_parameters["partial_rotary_factor"], not rotary_dim (#30389)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -42,9 +42,10 @@ class GteNewModelConfig(VerifyAndUpdateConfig):
|
||||
config.hidden_act = "geglu"
|
||||
|
||||
head_dim = config.hidden_size // config.num_attention_heads
|
||||
rotary_dim = getattr(config, "rotary_emb_dim", head_dim)
|
||||
config.rope_parameters["partial_rotary_factor"] = rotary_dim / head_dim
|
||||
config.rotary_kwargs = {
|
||||
"head_size": head_dim,
|
||||
"rotary_dim": getattr(config, "rotary_emb_dim", head_dim),
|
||||
"max_position": config.max_position_embeddings,
|
||||
"rope_parameters": config.rope_parameters,
|
||||
}
|
||||
@@ -77,9 +78,11 @@ class JinaRobertaModelConfig(VerifyAndUpdateConfig):
|
||||
if not model_config.enforce_eager:
|
||||
max_position = round_up(max_position, 8)
|
||||
|
||||
rotary_dim = getattr(config, "rotary_emb_dim", head_dim)
|
||||
config.rope_parameters["partial_rotary_factor"] = rotary_dim / head_dim
|
||||
|
||||
config.rotary_kwargs = {
|
||||
"head_size": head_dim,
|
||||
"rotary_dim": getattr(config, "rotary_emb_dim", head_dim),
|
||||
"max_position": max_position,
|
||||
"rope_parameters": config.rope_parameters,
|
||||
}
|
||||
@@ -113,12 +116,10 @@ class NomicBertModelConfig(VerifyAndUpdateConfig):
|
||||
config.num_hidden_layers = config.n_layer
|
||||
|
||||
head_dim = config.hidden_size // config.num_attention_heads
|
||||
rotary_emb_dim = int(head_dim * config.rotary_emb_fraction)
|
||||
max_trained_positions = getattr(config, "max_trained_positions", 2048)
|
||||
|
||||
config.rotary_kwargs = {
|
||||
"head_size": head_dim,
|
||||
"rotary_dim": rotary_emb_dim,
|
||||
"max_position": max_trained_positions,
|
||||
"rope_parameters": config.rope_parameters,
|
||||
}
|
||||
@@ -240,9 +241,10 @@ class SnowflakeGteNewModelConfig(VerifyAndUpdateConfig):
|
||||
config.hidden_act = "geglu"
|
||||
|
||||
head_dim = config.hidden_size // config.num_attention_heads
|
||||
rotary_dim = getattr(config, "rotary_emb_dim", head_dim)
|
||||
config.rope_parameters["partial_rotary_factor"] = rotary_dim / head_dim
|
||||
config.rotary_kwargs = {
|
||||
"head_size": head_dim,
|
||||
"rotary_dim": getattr(config, "rotary_emb_dim", head_dim),
|
||||
"max_position": config.max_position_embeddings,
|
||||
"rope_parameters": config.rope_parameters,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user