Update deprecated type hinting in vllm/transformers_utils (#18058)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-05-13 12:34:37 +01:00
committed by GitHub
parent ff334ca1cd
commit 8c946cecca
17 changed files with 98 additions and 102 deletions

View File

@@ -8,7 +8,7 @@
""" Arctic model configuration"""
from dataclasses import asdict, dataclass
from typing import Any, Dict
from typing import Any
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
@@ -192,14 +192,14 @@ class ArcticConfig(PretrainedConfig):
)
@classmethod
def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "ArcticConfig":
def from_dict(cls, config_dict: dict[str, Any], **kwargs) -> "ArcticConfig":
result = super().from_dict(config_dict, **kwargs)
config = result[0] if isinstance(result, tuple) else result
if isinstance(config.quantization, dict):
config.quantization = ArcticQuantizationConfig(**config.quantization)
return result
def to_dict(self) -> Dict[str, Any]:
def to_dict(self) -> dict[str, Any]:
ret = super().to_dict()
if isinstance(ret["quantization"], ArcticQuantizationConfig):
ret["quantization"] = asdict(ret["quantization"])

View File

@@ -61,7 +61,7 @@ class Cohere2Config(PretrainedConfig):
Whether to tie weight embeddings
rope_theta (`float`, *optional*, defaults to 10000.0):
The base period of the RoPE embeddings.
rope_scaling (`Dict`, *optional*):
rope_scaling (`dict`, *optional*):
Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
accordingly.
@@ -86,11 +86,11 @@ class Cohere2Config(PretrainedConfig):
`beta_slow` (`float`, *optional*):
Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
ramp function. If unspecified, it defaults to 1.
`short_factor` (`List[float]`, *optional*):
`short_factor` (`list[float]`, *optional*):
Only used with 'longrope'. The scaling factor to be applied to short contexts (<
`original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
size divided by the number of attention heads divided by 2
`long_factor` (`List[float]`, *optional*):
`long_factor` (`list[float]`, *optional*):
Only used with 'longrope'. The scaling factor to be applied to long contexts (<
`original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
size divided by the number of attention heads divided by 2

View File

@@ -1,7 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
# adapted from https://github.com/deepseek-ai/DeepSeek-VL2/blob/faf18023f24b962b32d9f0a2d89e402a8d383a78/deepseek_vl2/models/modeling_deepseek_vl_v2.py#L115-L268
from typing import Tuple
from transformers.configuration_utils import PretrainedConfig
@@ -191,12 +190,12 @@ class DeepseekVLV2Config(PretrainedConfig):
tile_tag: str = "2D"
global_view_pos: str = "head"
candidate_resolutions: Tuple[Tuple[int, int]] = ((384, 384), )
candidate_resolutions: tuple[tuple[int, int]] = ((384, 384), )
def __init__(self,
tile_tag: str = "tile_tag",
global_view_pos: str = "head",
candidate_resolutions: Tuple[Tuple[int,
candidate_resolutions: tuple[tuple[int,
int]] = ((384, 384), ),
**kwargs):
super().__init__(**kwargs)

View File

@@ -17,14 +17,12 @@
# limitations under the License.
"""Exaone model configuration"""
from typing import Dict
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: Dict[str, str] = {}
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: dict[str, str] = {}
class ExaoneConfig(PretrainedConfig):

View File

@@ -98,7 +98,7 @@ class JAISConfig(PretrainedConfig):
Scale attention weights by dividing by hidden_size instead of
sqrt(hidden_size). Need to set scale_attn_weights to `True` as
well.
alibi_scaling (`Dict`, *optional*):
alibi_scaling (`dict`, *optional*):
Dictionary containing the scaling configuration for ALiBi
embeddings. Currently only supports linear
scaling strategy. Can specify either the scaling `factor` (must be
@@ -108,7 +108,7 @@ class JAISConfig(PretrainedConfig):
formats are `{"type": strategy name, "factor": scaling factor}` or
`{"type": strategy name,
"train_seq_len": training sequence length}`.
architectures (`List`, *optional*, defaults to ['JAISLMHeadModel']):
architectures (`list`, *optional*, defaults to ['JAISLMHeadModel']):
architecture names for Jais.
Example:

View File

@@ -1,6 +1,6 @@
# SPDX-License-Identifier: Apache-2.0
from typing import List, Optional
from typing import Optional
from transformers import PretrainedConfig
@@ -17,7 +17,7 @@ class MLPSpeculatorConfig(PretrainedConfig):
emb_dim: int = 4096,
inner_dim: int = 0,
n_predict: int = 3,
top_k_tokens_per_head: Optional[List[int]] = None,
top_k_tokens_per_head: Optional[list[int]] = None,
n_candidates: int = 5,
tie_weights: bool = False,
scale_input: bool = False,
@@ -34,7 +34,7 @@ class MLPSpeculatorConfig(PretrainedConfig):
the inner dimension of the model. If 0, will be the emb_dim.
n_predict: int
the number of lookaheads for the speculator
top_k_tokens_per_head: List[int]
top_k_tokens_per_head: list[int]
Number of tokens to consider from each head when forming the
candidate tree.
For each candidate branch in the tree, head n produces topk[n]

View File

@@ -4,11 +4,11 @@
# https://huggingface.co/mosaicml/mpt-7b/blob/main/configuration_mpt.py
"""A HuggingFace-style model configuration."""
import warnings
from typing import Any, Dict, Optional, Union
from typing import Any, Optional, Union
from transformers import PretrainedConfig
attn_config_defaults: Dict = {
attn_config_defaults: dict = {
'attn_type': 'multihead_attention',
'attn_pdrop': 0.0,
'attn_impl': 'triton',
@@ -20,8 +20,8 @@ attn_config_defaults: Dict = {
'alibi': False,
'alibi_bias_max': 8
}
ffn_config_defaults: Dict = {'ffn_type': 'mptmlp'}
init_config_defaults: Dict = {
ffn_config_defaults: dict = {'ffn_type': 'mptmlp'}
init_config_defaults: dict = {
'name': 'kaiming_normal_',
'fan_mode': 'fan_in',
'init_nonlinearity': 'relu',
@@ -52,15 +52,15 @@ class MPTConfig(PretrainedConfig):
resid_pdrop: float = 0.0,
emb_pdrop: float = 0.0,
learned_pos_emb: bool = True,
attn_config: Dict = attn_config_defaults,
ffn_config: Dict = ffn_config_defaults,
attn_config: dict = attn_config_defaults,
ffn_config: dict = ffn_config_defaults,
init_device: str = 'cpu',
logit_scale: Optional[Union[float, str]] = None,
no_bias: bool = False,
embedding_fraction: float = 1.0,
norm_type: str = 'low_precision_layernorm',
use_cache: bool = False,
init_config: Dict = init_config_defaults,
init_config: dict = init_config_defaults,
fc_type: str = 'torch',
verbose: Optional[int] = None,
**kwargs: Any):
@@ -102,8 +102,8 @@ class MPTConfig(PretrainedConfig):
self._validate_config()
def _set_config_defaults(
self, config: Dict[str, Any],
config_defaults: Dict[str, Any]) -> Dict[str, Any]:
self, config: dict[str, Any],
config_defaults: dict[str, Any]) -> dict[str, Any]:
for (k, v) in config_defaults.items():
if k not in config:
config[k] = v

View File

@@ -108,7 +108,7 @@ class SolarConfig(PretrainedConfig):
Whether to tie weight embeddings
rope_theta (`float`, *optional*, defaults to 10000.0):
The base period of the RoPE embeddings.
rope_scaling (`Dict`, *optional*):
rope_scaling (`dict`, *optional*):
Dictionary containing the scaling configuration for
the RoPE embeddings.
Currently supports two scaling

View File

@@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# Adapted from https://github.com/fixie-ai/ultravox/blob/ecd58c4041030bae2ad15aa6bcf04ab43199ea02/ultravox/model/ultravox_config.py
from typing import Any, Dict, Optional
from typing import Any, Optional
import transformers
@@ -48,8 +48,8 @@ class UltravoxConfig(transformers.PretrainedConfig):
def __init__(
self,
audio_config: Optional[Dict[str, Any]] = None,
text_config: Optional[Dict[str, Any]] = None,
audio_config: Optional[dict[str, Any]] = None,
text_config: Optional[dict[str, Any]] = None,
audio_model_id: Optional[str] = None,
text_model_id: Optional[str] = None,
ignore_index: int = -100,
@@ -58,8 +58,8 @@ class UltravoxConfig(transformers.PretrainedConfig):
stack_factor: int = 8,
norm_init: float = 0.4,
projector_act: str = "swiglu",
text_model_lora_config: Optional[Dict[str, Any]] = None,
audio_model_lora_config: Optional[Dict[str, Any]] = None,
text_model_lora_config: Optional[dict[str, Any]] = None,
audio_model_lora_config: Optional[dict[str, Any]] = None,
projector_ln_mid: bool = False,
**kwargs,
):