diff --git a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py index 704598a57..965b21351 100644 --- a/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/chat_completion/test_completion_with_function_calling.py @@ -231,13 +231,14 @@ def k2_server(): "--gpu-memory-utilization", "0.4", ] + ROCM_EXTRA_ARGS - # hack to test kimi_k2 tool use tool_id format. - # avoid error in is_deepseek_mla check by setting kv_lora_rank=null + # Test kimi_k2 tool use tool_id format by overriding model_type. + # is_deepseek_mla safely returns False via getattr when kv_lora_rank + # is absent from the underlying config. with RemoteOpenAIServer( MODEL_NAME, args, env_dict=ROCM_ENV_OVERRIDES, - override_hf_configs={"model_type": "kimi_k2", "kv_lora_rank": None}, + override_hf_configs={"model_type": "kimi_k2"}, ) as remote_server: yield remote_server diff --git a/vllm/transformers_utils/configs/colpali.py b/vllm/transformers_utils/configs/colpali.py index f64aa7564..c40c58b25 100644 --- a/vllm/transformers_utils/configs/colpali.py +++ b/vllm/transformers_utils/configs/colpali.py @@ -27,7 +27,6 @@ class ColPaliConfig(PaliGemmaConfig): embedding_dim: int | None = None, embed_dim: int | None = None, dim: int | None = None, - projection_dim: int | None = None, colbert_dim: int | None = None, pooling: str | None = None, vlm_config: dict | None = None, @@ -37,7 +36,6 @@ class ColPaliConfig(PaliGemmaConfig): self.embedding_dim = embedding_dim self.embed_dim = embed_dim self.dim = dim - self.projection_dim = projection_dim self.colbert_dim = colbert_dim self.pooling = pooling diff --git a/vllm/transformers_utils/configs/deepseek_vl2.py b/vllm/transformers_utils/configs/deepseek_vl2.py index 822e8cdd0..80fedd101 100644 --- a/vllm/transformers_utils/configs/deepseek_vl2.py +++ b/vllm/transformers_utils/configs/deepseek_vl2.py @@ -90,8 +90,6 @@ class MlpProjectorConfig(PretrainedConfig): class DeepseekVLV2Config(PretrainedConfig): model_type = "deepseek_vl_v2" architectures: list[str] | None = None - vision_config: VisionEncoderConfig - projector_config: MlpProjectorConfig tile_tag: str = "2D" global_view_pos: str = "head" diff --git a/vllm/transformers_utils/configs/mistral.py b/vllm/transformers_utils/configs/mistral.py index 90728bbff..bdeadec1b 100644 --- a/vllm/transformers_utils/configs/mistral.py +++ b/vllm/transformers_utils/configs/mistral.py @@ -257,7 +257,6 @@ def _remap_mistral_audio_args(config: dict) -> dict: encoder_attention_heads=encoder_args["n_heads"], encoder_head_dim=encoder_args["head_dim"], vocab_size=encoder_args["vocab_size"], - max_source_positions=encoder_args["max_source_positions"], is_encoder_decoder=False, # Override WhisperConfig default is_causal=encoder_args.get("causal", False), sliding_window=encoder_args.get("sliding_window", None), @@ -270,6 +269,10 @@ def _remap_mistral_audio_args(config: dict) -> dict: max_position_embeddings=block_pool_size * config["max_position_embeddings"], ), } + # Sometimes max_source_positions is explicitly set to None in params.json but this + # is not a valid value for WhisperConfig (or downstream code that uses it). + if (max_source_positions := encoder_args.get("max_source_positions")) is not None: + config["audio_config"].max_source_positions = max_source_positions if quant_config: config["quantization_config"] = quant_config return config diff --git a/vllm/transformers_utils/configs/parakeet.py b/vllm/transformers_utils/configs/parakeet.py index efd4c4664..7c7a5ddd8 100644 --- a/vllm/transformers_utils/configs/parakeet.py +++ b/vllm/transformers_utils/configs/parakeet.py @@ -6,11 +6,21 @@ from transformers import ParakeetEncoderConfig, PretrainedConfig class ParakeetConfig(ParakeetEncoderConfig): - llm_hidden_size: int - projection_hidden_size: int - projection_bias: bool - projection_eps: float = 1e-5 - sampling_rate: int + def __init__( + self, + llm_hidden_size: int, + projection_hidden_size: int, + projection_bias: bool, + sampling_rate: int, + projection_eps: float = 1e-5, + **kwargs, + ): + super().__init__(**kwargs) + self.llm_hidden_size = llm_hidden_size + self.projection_hidden_size = projection_hidden_size + self.projection_bias = projection_bias + self.sampling_rate = sampling_rate + self.projection_eps = projection_eps @staticmethod def from_hf_config( diff --git a/vllm/transformers_utils/configs/qwen3_asr.py b/vllm/transformers_utils/configs/qwen3_asr.py index 28fa96e72..a08b2b7de 100644 --- a/vllm/transformers_utils/configs/qwen3_asr.py +++ b/vllm/transformers_utils/configs/qwen3_asr.py @@ -408,7 +408,6 @@ class Qwen3ASRConfig(PretrainedConfig): support_languages=None, **kwargs, ): - super().__init__(**kwargs) if thinker_config is None: thinker_config = {} logger.info( @@ -417,6 +416,7 @@ class Qwen3ASRConfig(PretrainedConfig): self.thinker_config = Qwen3ASRThinkerConfig(**thinker_config) self.support_languages = support_languages + super().__init__(**kwargs) def get_text_config(self, decoder=False) -> "PretrainedConfig": """ diff --git a/vllm/transformers_utils/configs/speculators/base.py b/vllm/transformers_utils/configs/speculators/base.py index 2a39e2f16..697c9d52e 100644 --- a/vllm/transformers_utils/configs/speculators/base.py +++ b/vllm/transformers_utils/configs/speculators/base.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import os +from dataclasses import fields, is_dataclass from typing import Any from transformers import PretrainedConfig @@ -15,11 +16,21 @@ class SpeculatorsConfig(PretrainedConfig): model_type = "speculators" def __init__(self, **kwargs): - """In Transformers v5, `PretrainedConfig` is decorated with `dataclass` and - `huggingface_hub.dataclasses.strict(accept_kwargs=True)`. - Inheriting classes do not inherit the `accept_kwargs=True` behaviour so we must - explicitly pass any kwargs to `PretrainedConfig.__init__`.""" - super().__init__(**kwargs) + # Transformers v4 - super().__init__ which sets all kwargs as attributes + if not is_dataclass(PretrainedConfig): + return super().__init__(**kwargs) + # Transformers v5 - super().__init__ performs some validation before + # setting all kwargs as attributes, so we set them first to be safe + pre_trained_config_fields = {f.name for f in fields(PretrainedConfig)} + super_kwargs = dict() + for key, value in kwargs.items(): + if key == "model_type": + continue # model_type is set as a class variable, so skip it here + elif key in pre_trained_config_fields: + super_kwargs[key] = value + else: + setattr(self, key, value) + super().__init__(**super_kwargs) @classmethod def from_pretrained( diff --git a/vllm/transformers_utils/model_arch_config_convertor.py b/vllm/transformers_utils/model_arch_config_convertor.py index 26fc04042..f5fb290d1 100644 --- a/vllm/transformers_utils/model_arch_config_convertor.py +++ b/vllm/transformers_utils/model_arch_config_convertor.py @@ -228,7 +228,7 @@ class ModelArchConfigConvertorBase: "pangu_ultra_moe_mtp", "bailing_hybrid", ): - return self.hf_text_config.kv_lora_rank is not None + return getattr(self.hf_text_config, "kv_lora_rank", None) is not None elif self.hf_text_config.model_type == "eagle": # if the model is an EAGLE module, check for the # underlying architecture @@ -241,7 +241,7 @@ class ModelArchConfigConvertorBase: "deepseek_v32", "deepseek_mtp", ) - and self.hf_text_config.kv_lora_rank is not None + and getattr(self.hf_text_config, "kv_lora_rank", None) is not None ) return False