Fix various config related issues for Transformers v5 (#37681)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -231,13 +231,14 @@ def k2_server():
|
||||
"--gpu-memory-utilization",
|
||||
"0.4",
|
||||
] + ROCM_EXTRA_ARGS
|
||||
# hack to test kimi_k2 tool use tool_id format.
|
||||
# avoid error in is_deepseek_mla check by setting kv_lora_rank=null
|
||||
# Test kimi_k2 tool use tool_id format by overriding model_type.
|
||||
# is_deepseek_mla safely returns False via getattr when kv_lora_rank
|
||||
# is absent from the underlying config.
|
||||
with RemoteOpenAIServer(
|
||||
MODEL_NAME,
|
||||
args,
|
||||
env_dict=ROCM_ENV_OVERRIDES,
|
||||
override_hf_configs={"model_type": "kimi_k2", "kv_lora_rank": None},
|
||||
override_hf_configs={"model_type": "kimi_k2"},
|
||||
) as remote_server:
|
||||
yield remote_server
|
||||
|
||||
|
||||
@@ -27,7 +27,6 @@ class ColPaliConfig(PaliGemmaConfig):
|
||||
embedding_dim: int | None = None,
|
||||
embed_dim: int | None = None,
|
||||
dim: int | None = None,
|
||||
projection_dim: int | None = None,
|
||||
colbert_dim: int | None = None,
|
||||
pooling: str | None = None,
|
||||
vlm_config: dict | None = None,
|
||||
@@ -37,7 +36,6 @@ class ColPaliConfig(PaliGemmaConfig):
|
||||
self.embedding_dim = embedding_dim
|
||||
self.embed_dim = embed_dim
|
||||
self.dim = dim
|
||||
self.projection_dim = projection_dim
|
||||
self.colbert_dim = colbert_dim
|
||||
self.pooling = pooling
|
||||
|
||||
|
||||
@@ -90,8 +90,6 @@ class MlpProjectorConfig(PretrainedConfig):
|
||||
class DeepseekVLV2Config(PretrainedConfig):
|
||||
model_type = "deepseek_vl_v2"
|
||||
architectures: list[str] | None = None
|
||||
vision_config: VisionEncoderConfig
|
||||
projector_config: MlpProjectorConfig
|
||||
|
||||
tile_tag: str = "2D"
|
||||
global_view_pos: str = "head"
|
||||
|
||||
@@ -257,7 +257,6 @@ def _remap_mistral_audio_args(config: dict) -> dict:
|
||||
encoder_attention_heads=encoder_args["n_heads"],
|
||||
encoder_head_dim=encoder_args["head_dim"],
|
||||
vocab_size=encoder_args["vocab_size"],
|
||||
max_source_positions=encoder_args["max_source_positions"],
|
||||
is_encoder_decoder=False, # Override WhisperConfig default
|
||||
is_causal=encoder_args.get("causal", False),
|
||||
sliding_window=encoder_args.get("sliding_window", None),
|
||||
@@ -270,6 +269,10 @@ def _remap_mistral_audio_args(config: dict) -> dict:
|
||||
max_position_embeddings=block_pool_size * config["max_position_embeddings"],
|
||||
),
|
||||
}
|
||||
# Sometimes max_source_positions is explicitly set to None in params.json but this
|
||||
# is not a valid value for WhisperConfig (or downstream code that uses it).
|
||||
if (max_source_positions := encoder_args.get("max_source_positions")) is not None:
|
||||
config["audio_config"].max_source_positions = max_source_positions
|
||||
if quant_config:
|
||||
config["quantization_config"] = quant_config
|
||||
return config
|
||||
|
||||
@@ -6,11 +6,21 @@ from transformers import ParakeetEncoderConfig, PretrainedConfig
|
||||
|
||||
|
||||
class ParakeetConfig(ParakeetEncoderConfig):
|
||||
llm_hidden_size: int
|
||||
projection_hidden_size: int
|
||||
projection_bias: bool
|
||||
projection_eps: float = 1e-5
|
||||
sampling_rate: int
|
||||
def __init__(
|
||||
self,
|
||||
llm_hidden_size: int,
|
||||
projection_hidden_size: int,
|
||||
projection_bias: bool,
|
||||
sampling_rate: int,
|
||||
projection_eps: float = 1e-5,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
self.llm_hidden_size = llm_hidden_size
|
||||
self.projection_hidden_size = projection_hidden_size
|
||||
self.projection_bias = projection_bias
|
||||
self.sampling_rate = sampling_rate
|
||||
self.projection_eps = projection_eps
|
||||
|
||||
@staticmethod
|
||||
def from_hf_config(
|
||||
|
||||
@@ -408,7 +408,6 @@ class Qwen3ASRConfig(PretrainedConfig):
|
||||
support_languages=None,
|
||||
**kwargs,
|
||||
):
|
||||
super().__init__(**kwargs)
|
||||
if thinker_config is None:
|
||||
thinker_config = {}
|
||||
logger.info(
|
||||
@@ -417,6 +416,7 @@ class Qwen3ASRConfig(PretrainedConfig):
|
||||
|
||||
self.thinker_config = Qwen3ASRThinkerConfig(**thinker_config)
|
||||
self.support_languages = support_languages
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def get_text_config(self, decoder=False) -> "PretrainedConfig":
|
||||
"""
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import os
|
||||
from dataclasses import fields, is_dataclass
|
||||
from typing import Any
|
||||
|
||||
from transformers import PretrainedConfig
|
||||
@@ -15,11 +16,21 @@ class SpeculatorsConfig(PretrainedConfig):
|
||||
model_type = "speculators"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""In Transformers v5, `PretrainedConfig` is decorated with `dataclass` and
|
||||
`huggingface_hub.dataclasses.strict(accept_kwargs=True)`.
|
||||
Inheriting classes do not inherit the `accept_kwargs=True` behaviour so we must
|
||||
explicitly pass any kwargs to `PretrainedConfig.__init__`."""
|
||||
super().__init__(**kwargs)
|
||||
# Transformers v4 - super().__init__ which sets all kwargs as attributes
|
||||
if not is_dataclass(PretrainedConfig):
|
||||
return super().__init__(**kwargs)
|
||||
# Transformers v5 - super().__init__ performs some validation before
|
||||
# setting all kwargs as attributes, so we set them first to be safe
|
||||
pre_trained_config_fields = {f.name for f in fields(PretrainedConfig)}
|
||||
super_kwargs = dict()
|
||||
for key, value in kwargs.items():
|
||||
if key == "model_type":
|
||||
continue # model_type is set as a class variable, so skip it here
|
||||
elif key in pre_trained_config_fields:
|
||||
super_kwargs[key] = value
|
||||
else:
|
||||
setattr(self, key, value)
|
||||
super().__init__(**super_kwargs)
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(
|
||||
|
||||
@@ -228,7 +228,7 @@ class ModelArchConfigConvertorBase:
|
||||
"pangu_ultra_moe_mtp",
|
||||
"bailing_hybrid",
|
||||
):
|
||||
return self.hf_text_config.kv_lora_rank is not None
|
||||
return getattr(self.hf_text_config, "kv_lora_rank", None) is not None
|
||||
elif self.hf_text_config.model_type == "eagle":
|
||||
# if the model is an EAGLE module, check for the
|
||||
# underlying architecture
|
||||
@@ -241,7 +241,7 @@ class ModelArchConfigConvertorBase:
|
||||
"deepseek_v32",
|
||||
"deepseek_mtp",
|
||||
)
|
||||
and self.hf_text_config.kv_lora_rank is not None
|
||||
and getattr(self.hf_text_config, "kv_lora_rank", None) is not None
|
||||
)
|
||||
return False
|
||||
|
||||
|
||||
Reference in New Issue
Block a user