Update deprecated type hinting in vllm/transformers_utils (#18058)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
""" Arctic model configuration"""
|
||||
|
||||
from dataclasses import asdict, dataclass
|
||||
from typing import Any, Dict
|
||||
from typing import Any
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
@@ -192,14 +192,14 @@ class ArcticConfig(PretrainedConfig):
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, config_dict: Dict[str, Any], **kwargs) -> "ArcticConfig":
|
||||
def from_dict(cls, config_dict: dict[str, Any], **kwargs) -> "ArcticConfig":
|
||||
result = super().from_dict(config_dict, **kwargs)
|
||||
config = result[0] if isinstance(result, tuple) else result
|
||||
if isinstance(config.quantization, dict):
|
||||
config.quantization = ArcticQuantizationConfig(**config.quantization)
|
||||
return result
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
ret = super().to_dict()
|
||||
if isinstance(ret["quantization"], ArcticQuantizationConfig):
|
||||
ret["quantization"] = asdict(ret["quantization"])
|
||||
|
||||
@@ -61,7 +61,7 @@ class Cohere2Config(PretrainedConfig):
|
||||
Whether to tie weight embeddings
|
||||
rope_theta (`float`, *optional*, defaults to 10000.0):
|
||||
The base period of the RoPE embeddings.
|
||||
rope_scaling (`Dict`, *optional*):
|
||||
rope_scaling (`dict`, *optional*):
|
||||
Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
|
||||
and you expect the model to work on longer `max_position_embeddings`, we recommend you to update this value
|
||||
accordingly.
|
||||
@@ -86,11 +86,11 @@ class Cohere2Config(PretrainedConfig):
|
||||
`beta_slow` (`float`, *optional*):
|
||||
Only used with 'yarn'. Parameter to set the boundary for interpolation (only) in the linear
|
||||
ramp function. If unspecified, it defaults to 1.
|
||||
`short_factor` (`List[float]`, *optional*):
|
||||
`short_factor` (`list[float]`, *optional*):
|
||||
Only used with 'longrope'. The scaling factor to be applied to short contexts (<
|
||||
`original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
|
||||
size divided by the number of attention heads divided by 2
|
||||
`long_factor` (`List[float]`, *optional*):
|
||||
`long_factor` (`list[float]`, *optional*):
|
||||
Only used with 'longrope'. The scaling factor to be applied to long contexts (<
|
||||
`original_max_position_embeddings`). Must be a list of numbers with the same length as the hidden
|
||||
size divided by the number of attention heads divided by 2
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# adapted from https://github.com/deepseek-ai/DeepSeek-VL2/blob/faf18023f24b962b32d9f0a2d89e402a8d383a78/deepseek_vl2/models/modeling_deepseek_vl_v2.py#L115-L268
|
||||
from typing import Tuple
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
|
||||
@@ -191,12 +190,12 @@ class DeepseekVLV2Config(PretrainedConfig):
|
||||
|
||||
tile_tag: str = "2D"
|
||||
global_view_pos: str = "head"
|
||||
candidate_resolutions: Tuple[Tuple[int, int]] = ((384, 384), )
|
||||
candidate_resolutions: tuple[tuple[int, int]] = ((384, 384), )
|
||||
|
||||
def __init__(self,
|
||||
tile_tag: str = "tile_tag",
|
||||
global_view_pos: str = "head",
|
||||
candidate_resolutions: Tuple[Tuple[int,
|
||||
candidate_resolutions: tuple[tuple[int,
|
||||
int]] = ((384, 384), ),
|
||||
**kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
@@ -17,14 +17,12 @@
|
||||
# limitations under the License.
|
||||
"""Exaone model configuration"""
|
||||
|
||||
from typing import Dict
|
||||
|
||||
from transformers.configuration_utils import PretrainedConfig
|
||||
from transformers.utils import logging
|
||||
|
||||
logger = logging.get_logger(__name__)
|
||||
|
||||
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: Dict[str, str] = {}
|
||||
EXAONE_PRETRAINED_CONFIG_ARCHIVE_MAP: dict[str, str] = {}
|
||||
|
||||
|
||||
class ExaoneConfig(PretrainedConfig):
|
||||
|
||||
@@ -98,7 +98,7 @@ class JAISConfig(PretrainedConfig):
|
||||
Scale attention weights by dividing by hidden_size instead of
|
||||
sqrt(hidden_size). Need to set scale_attn_weights to `True` as
|
||||
well.
|
||||
alibi_scaling (`Dict`, *optional*):
|
||||
alibi_scaling (`dict`, *optional*):
|
||||
Dictionary containing the scaling configuration for ALiBi
|
||||
embeddings. Currently only supports linear
|
||||
scaling strategy. Can specify either the scaling `factor` (must be
|
||||
@@ -108,7 +108,7 @@ class JAISConfig(PretrainedConfig):
|
||||
formats are `{"type": strategy name, "factor": scaling factor}` or
|
||||
`{"type": strategy name,
|
||||
"train_seq_len": training sequence length}`.
|
||||
architectures (`List`, *optional*, defaults to ['JAISLMHeadModel']):
|
||||
architectures (`list`, *optional*, defaults to ['JAISLMHeadModel']):
|
||||
architecture names for Jais.
|
||||
|
||||
Example:
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
from typing import List, Optional
|
||||
from typing import Optional
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
@@ -17,7 +17,7 @@ class MLPSpeculatorConfig(PretrainedConfig):
|
||||
emb_dim: int = 4096,
|
||||
inner_dim: int = 0,
|
||||
n_predict: int = 3,
|
||||
top_k_tokens_per_head: Optional[List[int]] = None,
|
||||
top_k_tokens_per_head: Optional[list[int]] = None,
|
||||
n_candidates: int = 5,
|
||||
tie_weights: bool = False,
|
||||
scale_input: bool = False,
|
||||
@@ -34,7 +34,7 @@ class MLPSpeculatorConfig(PretrainedConfig):
|
||||
the inner dimension of the model. If 0, will be the emb_dim.
|
||||
n_predict: int
|
||||
the number of lookaheads for the speculator
|
||||
top_k_tokens_per_head: List[int]
|
||||
top_k_tokens_per_head: list[int]
|
||||
Number of tokens to consider from each head when forming the
|
||||
candidate tree.
|
||||
For each candidate branch in the tree, head n produces topk[n]
|
||||
|
||||
@@ -4,11 +4,11 @@
|
||||
# https://huggingface.co/mosaicml/mpt-7b/blob/main/configuration_mpt.py
|
||||
"""A HuggingFace-style model configuration."""
|
||||
import warnings
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
|
||||
attn_config_defaults: Dict = {
|
||||
attn_config_defaults: dict = {
|
||||
'attn_type': 'multihead_attention',
|
||||
'attn_pdrop': 0.0,
|
||||
'attn_impl': 'triton',
|
||||
@@ -20,8 +20,8 @@ attn_config_defaults: Dict = {
|
||||
'alibi': False,
|
||||
'alibi_bias_max': 8
|
||||
}
|
||||
ffn_config_defaults: Dict = {'ffn_type': 'mptmlp'}
|
||||
init_config_defaults: Dict = {
|
||||
ffn_config_defaults: dict = {'ffn_type': 'mptmlp'}
|
||||
init_config_defaults: dict = {
|
||||
'name': 'kaiming_normal_',
|
||||
'fan_mode': 'fan_in',
|
||||
'init_nonlinearity': 'relu',
|
||||
@@ -52,15 +52,15 @@ class MPTConfig(PretrainedConfig):
|
||||
resid_pdrop: float = 0.0,
|
||||
emb_pdrop: float = 0.0,
|
||||
learned_pos_emb: bool = True,
|
||||
attn_config: Dict = attn_config_defaults,
|
||||
ffn_config: Dict = ffn_config_defaults,
|
||||
attn_config: dict = attn_config_defaults,
|
||||
ffn_config: dict = ffn_config_defaults,
|
||||
init_device: str = 'cpu',
|
||||
logit_scale: Optional[Union[float, str]] = None,
|
||||
no_bias: bool = False,
|
||||
embedding_fraction: float = 1.0,
|
||||
norm_type: str = 'low_precision_layernorm',
|
||||
use_cache: bool = False,
|
||||
init_config: Dict = init_config_defaults,
|
||||
init_config: dict = init_config_defaults,
|
||||
fc_type: str = 'torch',
|
||||
verbose: Optional[int] = None,
|
||||
**kwargs: Any):
|
||||
@@ -102,8 +102,8 @@ class MPTConfig(PretrainedConfig):
|
||||
self._validate_config()
|
||||
|
||||
def _set_config_defaults(
|
||||
self, config: Dict[str, Any],
|
||||
config_defaults: Dict[str, Any]) -> Dict[str, Any]:
|
||||
self, config: dict[str, Any],
|
||||
config_defaults: dict[str, Any]) -> dict[str, Any]:
|
||||
for (k, v) in config_defaults.items():
|
||||
if k not in config:
|
||||
config[k] = v
|
||||
|
||||
@@ -108,7 +108,7 @@ class SolarConfig(PretrainedConfig):
|
||||
Whether to tie weight embeddings
|
||||
rope_theta (`float`, *optional*, defaults to 10000.0):
|
||||
The base period of the RoPE embeddings.
|
||||
rope_scaling (`Dict`, *optional*):
|
||||
rope_scaling (`dict`, *optional*):
|
||||
Dictionary containing the scaling configuration for
|
||||
the RoPE embeddings.
|
||||
Currently supports two scaling
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
# Adapted from https://github.com/fixie-ai/ultravox/blob/ecd58c4041030bae2ad15aa6bcf04ab43199ea02/ultravox/model/ultravox_config.py
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Optional
|
||||
|
||||
import transformers
|
||||
|
||||
@@ -48,8 +48,8 @@ class UltravoxConfig(transformers.PretrainedConfig):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
audio_config: Optional[Dict[str, Any]] = None,
|
||||
text_config: Optional[Dict[str, Any]] = None,
|
||||
audio_config: Optional[dict[str, Any]] = None,
|
||||
text_config: Optional[dict[str, Any]] = None,
|
||||
audio_model_id: Optional[str] = None,
|
||||
text_model_id: Optional[str] = None,
|
||||
ignore_index: int = -100,
|
||||
@@ -58,8 +58,8 @@ class UltravoxConfig(transformers.PretrainedConfig):
|
||||
stack_factor: int = 8,
|
||||
norm_init: float = 0.4,
|
||||
projector_act: str = "swiglu",
|
||||
text_model_lora_config: Optional[Dict[str, Any]] = None,
|
||||
audio_model_lora_config: Optional[Dict[str, Any]] = None,
|
||||
text_model_lora_config: Optional[dict[str, Any]] = None,
|
||||
audio_model_lora_config: Optional[dict[str, Any]] = None,
|
||||
projector_ln_mid: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
|
||||
Reference in New Issue
Block a user