Fix various config related issues for Transformers v5 (#37681)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -231,13 +231,14 @@ def k2_server():
|
|||||||
"--gpu-memory-utilization",
|
"--gpu-memory-utilization",
|
||||||
"0.4",
|
"0.4",
|
||||||
] + ROCM_EXTRA_ARGS
|
] + ROCM_EXTRA_ARGS
|
||||||
# hack to test kimi_k2 tool use tool_id format.
|
# Test kimi_k2 tool use tool_id format by overriding model_type.
|
||||||
# avoid error in is_deepseek_mla check by setting kv_lora_rank=null
|
# is_deepseek_mla safely returns False via getattr when kv_lora_rank
|
||||||
|
# is absent from the underlying config.
|
||||||
with RemoteOpenAIServer(
|
with RemoteOpenAIServer(
|
||||||
MODEL_NAME,
|
MODEL_NAME,
|
||||||
args,
|
args,
|
||||||
env_dict=ROCM_ENV_OVERRIDES,
|
env_dict=ROCM_ENV_OVERRIDES,
|
||||||
override_hf_configs={"model_type": "kimi_k2", "kv_lora_rank": None},
|
override_hf_configs={"model_type": "kimi_k2"},
|
||||||
) as remote_server:
|
) as remote_server:
|
||||||
yield remote_server
|
yield remote_server
|
||||||
|
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ class ColPaliConfig(PaliGemmaConfig):
|
|||||||
embedding_dim: int | None = None,
|
embedding_dim: int | None = None,
|
||||||
embed_dim: int | None = None,
|
embed_dim: int | None = None,
|
||||||
dim: int | None = None,
|
dim: int | None = None,
|
||||||
projection_dim: int | None = None,
|
|
||||||
colbert_dim: int | None = None,
|
colbert_dim: int | None = None,
|
||||||
pooling: str | None = None,
|
pooling: str | None = None,
|
||||||
vlm_config: dict | None = None,
|
vlm_config: dict | None = None,
|
||||||
@@ -37,7 +36,6 @@ class ColPaliConfig(PaliGemmaConfig):
|
|||||||
self.embedding_dim = embedding_dim
|
self.embedding_dim = embedding_dim
|
||||||
self.embed_dim = embed_dim
|
self.embed_dim = embed_dim
|
||||||
self.dim = dim
|
self.dim = dim
|
||||||
self.projection_dim = projection_dim
|
|
||||||
self.colbert_dim = colbert_dim
|
self.colbert_dim = colbert_dim
|
||||||
self.pooling = pooling
|
self.pooling = pooling
|
||||||
|
|
||||||
|
|||||||
@@ -90,8 +90,6 @@ class MlpProjectorConfig(PretrainedConfig):
|
|||||||
class DeepseekVLV2Config(PretrainedConfig):
|
class DeepseekVLV2Config(PretrainedConfig):
|
||||||
model_type = "deepseek_vl_v2"
|
model_type = "deepseek_vl_v2"
|
||||||
architectures: list[str] | None = None
|
architectures: list[str] | None = None
|
||||||
vision_config: VisionEncoderConfig
|
|
||||||
projector_config: MlpProjectorConfig
|
|
||||||
|
|
||||||
tile_tag: str = "2D"
|
tile_tag: str = "2D"
|
||||||
global_view_pos: str = "head"
|
global_view_pos: str = "head"
|
||||||
|
|||||||
@@ -257,7 +257,6 @@ def _remap_mistral_audio_args(config: dict) -> dict:
|
|||||||
encoder_attention_heads=encoder_args["n_heads"],
|
encoder_attention_heads=encoder_args["n_heads"],
|
||||||
encoder_head_dim=encoder_args["head_dim"],
|
encoder_head_dim=encoder_args["head_dim"],
|
||||||
vocab_size=encoder_args["vocab_size"],
|
vocab_size=encoder_args["vocab_size"],
|
||||||
max_source_positions=encoder_args["max_source_positions"],
|
|
||||||
is_encoder_decoder=False, # Override WhisperConfig default
|
is_encoder_decoder=False, # Override WhisperConfig default
|
||||||
is_causal=encoder_args.get("causal", False),
|
is_causal=encoder_args.get("causal", False),
|
||||||
sliding_window=encoder_args.get("sliding_window", None),
|
sliding_window=encoder_args.get("sliding_window", None),
|
||||||
@@ -270,6 +269,10 @@ def _remap_mistral_audio_args(config: dict) -> dict:
|
|||||||
max_position_embeddings=block_pool_size * config["max_position_embeddings"],
|
max_position_embeddings=block_pool_size * config["max_position_embeddings"],
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
# Sometimes max_source_positions is explicitly set to None in params.json but this
|
||||||
|
# is not a valid value for WhisperConfig (or downstream code that uses it).
|
||||||
|
if (max_source_positions := encoder_args.get("max_source_positions")) is not None:
|
||||||
|
config["audio_config"].max_source_positions = max_source_positions
|
||||||
if quant_config:
|
if quant_config:
|
||||||
config["quantization_config"] = quant_config
|
config["quantization_config"] = quant_config
|
||||||
return config
|
return config
|
||||||
|
|||||||
@@ -6,11 +6,21 @@ from transformers import ParakeetEncoderConfig, PretrainedConfig
|
|||||||
|
|
||||||
|
|
||||||
class ParakeetConfig(ParakeetEncoderConfig):
|
class ParakeetConfig(ParakeetEncoderConfig):
|
||||||
llm_hidden_size: int
|
def __init__(
|
||||||
projection_hidden_size: int
|
self,
|
||||||
projection_bias: bool
|
llm_hidden_size: int,
|
||||||
projection_eps: float = 1e-5
|
projection_hidden_size: int,
|
||||||
sampling_rate: int
|
projection_bias: bool,
|
||||||
|
sampling_rate: int,
|
||||||
|
projection_eps: float = 1e-5,
|
||||||
|
**kwargs,
|
||||||
|
):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
self.llm_hidden_size = llm_hidden_size
|
||||||
|
self.projection_hidden_size = projection_hidden_size
|
||||||
|
self.projection_bias = projection_bias
|
||||||
|
self.sampling_rate = sampling_rate
|
||||||
|
self.projection_eps = projection_eps
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_hf_config(
|
def from_hf_config(
|
||||||
|
|||||||
@@ -408,7 +408,6 @@ class Qwen3ASRConfig(PretrainedConfig):
|
|||||||
support_languages=None,
|
support_languages=None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
super().__init__(**kwargs)
|
|
||||||
if thinker_config is None:
|
if thinker_config is None:
|
||||||
thinker_config = {}
|
thinker_config = {}
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -417,6 +416,7 @@ class Qwen3ASRConfig(PretrainedConfig):
|
|||||||
|
|
||||||
self.thinker_config = Qwen3ASRThinkerConfig(**thinker_config)
|
self.thinker_config = Qwen3ASRThinkerConfig(**thinker_config)
|
||||||
self.support_languages = support_languages
|
self.support_languages = support_languages
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
def get_text_config(self, decoder=False) -> "PretrainedConfig":
|
def get_text_config(self, decoder=False) -> "PretrainedConfig":
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
import os
|
import os
|
||||||
|
from dataclasses import fields, is_dataclass
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from transformers import PretrainedConfig
|
from transformers import PretrainedConfig
|
||||||
@@ -15,11 +16,21 @@ class SpeculatorsConfig(PretrainedConfig):
|
|||||||
model_type = "speculators"
|
model_type = "speculators"
|
||||||
|
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
"""In Transformers v5, `PretrainedConfig` is decorated with `dataclass` and
|
# Transformers v4 - super().__init__ which sets all kwargs as attributes
|
||||||
`huggingface_hub.dataclasses.strict(accept_kwargs=True)`.
|
if not is_dataclass(PretrainedConfig):
|
||||||
Inheriting classes do not inherit the `accept_kwargs=True` behaviour so we must
|
return super().__init__(**kwargs)
|
||||||
explicitly pass any kwargs to `PretrainedConfig.__init__`."""
|
# Transformers v5 - super().__init__ performs some validation before
|
||||||
super().__init__(**kwargs)
|
# setting all kwargs as attributes, so we set them first to be safe
|
||||||
|
pre_trained_config_fields = {f.name for f in fields(PretrainedConfig)}
|
||||||
|
super_kwargs = dict()
|
||||||
|
for key, value in kwargs.items():
|
||||||
|
if key == "model_type":
|
||||||
|
continue # model_type is set as a class variable, so skip it here
|
||||||
|
elif key in pre_trained_config_fields:
|
||||||
|
super_kwargs[key] = value
|
||||||
|
else:
|
||||||
|
setattr(self, key, value)
|
||||||
|
super().__init__(**super_kwargs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_pretrained(
|
def from_pretrained(
|
||||||
|
|||||||
@@ -228,7 +228,7 @@ class ModelArchConfigConvertorBase:
|
|||||||
"pangu_ultra_moe_mtp",
|
"pangu_ultra_moe_mtp",
|
||||||
"bailing_hybrid",
|
"bailing_hybrid",
|
||||||
):
|
):
|
||||||
return self.hf_text_config.kv_lora_rank is not None
|
return getattr(self.hf_text_config, "kv_lora_rank", None) is not None
|
||||||
elif self.hf_text_config.model_type == "eagle":
|
elif self.hf_text_config.model_type == "eagle":
|
||||||
# if the model is an EAGLE module, check for the
|
# if the model is an EAGLE module, check for the
|
||||||
# underlying architecture
|
# underlying architecture
|
||||||
@@ -241,7 +241,7 @@ class ModelArchConfigConvertorBase:
|
|||||||
"deepseek_v32",
|
"deepseek_v32",
|
||||||
"deepseek_mtp",
|
"deepseek_mtp",
|
||||||
)
|
)
|
||||||
and self.hf_text_config.kv_lora_rank is not None
|
and getattr(self.hf_text_config, "kv_lora_rank", None) is not None
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user