Fix various config related issues for Transformers v5 (#37681)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-03-20 16:30:12 +00:00
committed by GitHub
parent 2e089b96a8
commit 6ade4bc5a5
8 changed files with 42 additions and 21 deletions

View File

@@ -231,13 +231,14 @@ def k2_server():
"--gpu-memory-utilization",
"0.4",
] + ROCM_EXTRA_ARGS
# hack to test kimi_k2 tool use tool_id format.
# avoid error in is_deepseek_mla check by setting kv_lora_rank=null
# Test kimi_k2 tool use tool_id format by overriding model_type.
# is_deepseek_mla safely returns False via getattr when kv_lora_rank
# is absent from the underlying config.
with RemoteOpenAIServer(
MODEL_NAME,
args,
env_dict=ROCM_ENV_OVERRIDES,
override_hf_configs={"model_type": "kimi_k2", "kv_lora_rank": None},
override_hf_configs={"model_type": "kimi_k2"},
) as remote_server:
yield remote_server

View File

@@ -27,7 +27,6 @@ class ColPaliConfig(PaliGemmaConfig):
embedding_dim: int | None = None,
embed_dim: int | None = None,
dim: int | None = None,
projection_dim: int | None = None,
colbert_dim: int | None = None,
pooling: str | None = None,
vlm_config: dict | None = None,
@@ -37,7 +36,6 @@ class ColPaliConfig(PaliGemmaConfig):
self.embedding_dim = embedding_dim
self.embed_dim = embed_dim
self.dim = dim
self.projection_dim = projection_dim
self.colbert_dim = colbert_dim
self.pooling = pooling

View File

@@ -90,8 +90,6 @@ class MlpProjectorConfig(PretrainedConfig):
class DeepseekVLV2Config(PretrainedConfig):
model_type = "deepseek_vl_v2"
architectures: list[str] | None = None
vision_config: VisionEncoderConfig
projector_config: MlpProjectorConfig
tile_tag: str = "2D"
global_view_pos: str = "head"

View File

@@ -257,7 +257,6 @@ def _remap_mistral_audio_args(config: dict) -> dict:
encoder_attention_heads=encoder_args["n_heads"],
encoder_head_dim=encoder_args["head_dim"],
vocab_size=encoder_args["vocab_size"],
max_source_positions=encoder_args["max_source_positions"],
is_encoder_decoder=False, # Override WhisperConfig default
is_causal=encoder_args.get("causal", False),
sliding_window=encoder_args.get("sliding_window", None),
@@ -270,6 +269,10 @@ def _remap_mistral_audio_args(config: dict) -> dict:
max_position_embeddings=block_pool_size * config["max_position_embeddings"],
),
}
# Sometimes max_source_positions is explicitly set to None in params.json but this
# is not a valid value for WhisperConfig (or downstream code that uses it).
if (max_source_positions := encoder_args.get("max_source_positions")) is not None:
config["audio_config"].max_source_positions = max_source_positions
if quant_config:
config["quantization_config"] = quant_config
return config

View File

@@ -6,11 +6,21 @@ from transformers import ParakeetEncoderConfig, PretrainedConfig
class ParakeetConfig(ParakeetEncoderConfig):
llm_hidden_size: int
projection_hidden_size: int
projection_bias: bool
projection_eps: float = 1e-5
sampling_rate: int
def __init__(
self,
llm_hidden_size: int,
projection_hidden_size: int,
projection_bias: bool,
sampling_rate: int,
projection_eps: float = 1e-5,
**kwargs,
):
super().__init__(**kwargs)
self.llm_hidden_size = llm_hidden_size
self.projection_hidden_size = projection_hidden_size
self.projection_bias = projection_bias
self.sampling_rate = sampling_rate
self.projection_eps = projection_eps
@staticmethod
def from_hf_config(

View File

@@ -408,7 +408,6 @@ class Qwen3ASRConfig(PretrainedConfig):
support_languages=None,
**kwargs,
):
super().__init__(**kwargs)
if thinker_config is None:
thinker_config = {}
logger.info(
@@ -417,6 +416,7 @@ class Qwen3ASRConfig(PretrainedConfig):
self.thinker_config = Qwen3ASRThinkerConfig(**thinker_config)
self.support_languages = support_languages
super().__init__(**kwargs)
def get_text_config(self, decoder=False) -> "PretrainedConfig":
"""

View File

@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import os
from dataclasses import fields, is_dataclass
from typing import Any
from transformers import PretrainedConfig
@@ -15,11 +16,21 @@ class SpeculatorsConfig(PretrainedConfig):
model_type = "speculators"
def __init__(self, **kwargs):
"""In Transformers v5, `PretrainedConfig` is decorated with `dataclass` and
`huggingface_hub.dataclasses.strict(accept_kwargs=True)`.
Inheriting classes do not inherit the `accept_kwargs=True` behaviour so we must
explicitly pass any kwargs to `PretrainedConfig.__init__`."""
super().__init__(**kwargs)
# Transformers v4 - super().__init__ which sets all kwargs as attributes
if not is_dataclass(PretrainedConfig):
return super().__init__(**kwargs)
# Transformers v5 - super().__init__ performs some validation before
# setting all kwargs as attributes, so we set them first to be safe
pre_trained_config_fields = {f.name for f in fields(PretrainedConfig)}
super_kwargs = dict()
for key, value in kwargs.items():
if key == "model_type":
continue # model_type is set as a class variable, so skip it here
elif key in pre_trained_config_fields:
super_kwargs[key] = value
else:
setattr(self, key, value)
super().__init__(**super_kwargs)
@classmethod
def from_pretrained(

View File

@@ -228,7 +228,7 @@ class ModelArchConfigConvertorBase:
"pangu_ultra_moe_mtp",
"bailing_hybrid",
):
return self.hf_text_config.kv_lora_rank is not None
return getattr(self.hf_text_config, "kv_lora_rank", None) is not None
elif self.hf_text_config.model_type == "eagle":
# if the model is an EAGLE module, check for the
# underlying architecture
@@ -241,7 +241,7 @@ class ModelArchConfigConvertorBase:
"deepseek_v32",
"deepseek_mtp",
)
and self.hf_text_config.kv_lora_rank is not None
and getattr(self.hf_text_config, "kv_lora_rank", None) is not None
)
return False