Update rope_scaling to rope_parameters in preparation for Transformers v5 (#28542)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-11-19 18:06:36 +01:00
committed by GitHub
parent d44e9df7d4
commit a8b70304d6
104 changed files with 542 additions and 910 deletions

View File

@@ -26,7 +26,6 @@
from collections.abc import Iterable
from itertools import islice
from typing import Any
import torch
from torch import nn
@@ -120,8 +119,6 @@ class LlamaAttention(nn.Module):
hidden_size: int,
num_heads: int,
num_kv_heads: int,
rope_theta: float = 10000,
rope_scaling: dict[str, Any] | None = None,
max_position_embeddings: int = 8192,
quant_config: QuantizationConfig | None = None,
bias: bool = False,
@@ -157,7 +154,6 @@ class LlamaAttention(nn.Module):
self.q_size = self.num_heads * self.head_dim
self.kv_size = self.num_kv_heads * self.head_dim
self.scaling = self.head_dim**-0.5
self.rope_theta = rope_theta
self.max_position_embeddings = max_position_embeddings
llama_4_scaling_config = getattr(config, "llama_4_scaling", None)
@@ -186,9 +182,7 @@ class LlamaAttention(nn.Module):
prefix=f"{prefix}.o_proj",
)
self._init_rotary_emb(
config, rope_scaling=rope_scaling, quant_config=quant_config
)
self._init_rotary_emb(config, quant_config=quant_config)
sliding_window = None
if layer_types := getattr(config, "layer_types", None):
@@ -258,7 +252,6 @@ class LlamaAttention(nn.Module):
def _init_rotary_emb(
self,
config: LlamaConfig,
rope_scaling: dict[str, Any] | None,
quant_config: QuantizationConfig | None,
) -> None:
is_neox_style = True
@@ -270,8 +263,7 @@ class LlamaAttention(nn.Module):
self.head_dim,
rotary_dim=self.head_dim,
max_position=self.max_position_embeddings,
base=self.rope_theta,
rope_scaling=rope_scaling,
rope_parameters=config.rope_parameters,
is_neox_style=is_neox_style,
partial_rotary_factor=self.partial_rotary_factor,
)
@@ -291,14 +283,6 @@ class LlamaDecoderLayer(nn.Module):
quant_config = self.get_quant_config(vllm_config)
self.hidden_size = config.hidden_size
rope_theta = getattr(config, "rope_theta", 10000)
rope_scaling = getattr(config, "rope_scaling", None)
if rope_scaling is not None and getattr(
config, "original_max_position_embeddings", None
):
rope_scaling["original_max_position_embeddings"] = (
config.original_max_position_embeddings
)
max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
# Support abacusai/Smaug-72B-v0.1 with attention_bias
# Support internlm/internlm-7b with bias
@@ -326,8 +310,6 @@ class LlamaDecoderLayer(nn.Module):
num_kv_heads=getattr(
config, "num_key_value_heads", config.num_attention_heads
),
rope_theta=rope_theta,
rope_scaling=rope_scaling,
max_position_embeddings=max_position_embeddings,
quant_config=quant_config,
bias=attention_bias,