From 2f186635cbcb38fd85e718a5b7ff9ec698cbb4f8 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sat, 14 Feb 2026 03:56:11 -0800 Subject: [PATCH] [Bugfix] Fix Qwen3.5 config loading (#34554) Signed-off-by: Roger Wang --- vllm/transformers_utils/configs/qwen3_5.py | 14 +++++++++----- vllm/transformers_utils/configs/qwen3_5_moe.py | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/vllm/transformers_utils/configs/qwen3_5.py b/vllm/transformers_utils/configs/qwen3_5.py index 22c1d9d98..9d43986a6 100644 --- a/vllm/transformers_utils/configs/qwen3_5.py +++ b/vllm/transformers_utils/configs/qwen3_5.py @@ -72,10 +72,6 @@ class Qwen3_5TextConfig(PretrainedConfig): "mrope_section", "mrope_interleaved", ] - self.pad_token_id = pad_token_id - self.bos_token_id = bos_token_id - self.eos_token_id = eos_token_id - self.tie_word_embeddings = tie_word_embeddings self.vocab_size = vocab_size self.max_position_embeddings = max_position_embeddings self.hidden_size = hidden_size @@ -111,6 +107,13 @@ class Qwen3_5TextConfig(PretrainedConfig): self.linear_num_key_heads = linear_num_key_heads self.linear_num_value_heads = linear_num_value_heads super().__init__(**kwargs) + # Set these AFTER super().__init__() because transformers v4's + # PretrainedConfig.__init__ has these as explicit params with different + # defaults (e.g. tie_word_embeddings=True) that would overwrite our values. + self.pad_token_id = pad_token_id + self.bos_token_id = bos_token_id + self.eos_token_id = eos_token_id + self.tie_word_embeddings = tie_word_embeddings class Qwen3_5VisionConfig(PretrainedConfig): @@ -182,8 +185,9 @@ class Qwen3_5Config(PretrainedConfig): self.video_token_id = video_token_id self.vision_start_token_id = vision_start_token_id self.vision_end_token_id = vision_end_token_id - self.tie_word_embeddings = tie_word_embeddings super().__init__(**kwargs) + # Set after super().__init__() to avoid v4 PretrainedConfig overwrite + self.tie_word_embeddings = tie_word_embeddings __all__ = ["Qwen3_5Config", "Qwen3_5TextConfig"] diff --git a/vllm/transformers_utils/configs/qwen3_5_moe.py b/vllm/transformers_utils/configs/qwen3_5_moe.py index 701527c91..41a1f7ed9 100644 --- a/vllm/transformers_utils/configs/qwen3_5_moe.py +++ b/vllm/transformers_utils/configs/qwen3_5_moe.py @@ -79,10 +79,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig): "mrope_section", "mrope_interleaved", ] - self.pad_token_id = pad_token_id - self.bos_token_id = bos_token_id - self.eos_token_id = eos_token_id - self.tie_word_embeddings = tie_word_embeddings self.vocab_size = vocab_size self.max_position_embeddings = max_position_embeddings self.hidden_size = hidden_size @@ -123,6 +119,13 @@ class Qwen3_5MoeTextConfig(PretrainedConfig): self.output_router_logits = output_router_logits self.router_aux_loss_coef = router_aux_loss_coef super().__init__(**kwargs) + # Set these AFTER super().__init__() because transformers v4's + # PretrainedConfig.__init__ has these as explicit params with different + # defaults (e.g. tie_word_embeddings=True) that would overwrite our values. + self.pad_token_id = pad_token_id + self.bos_token_id = bos_token_id + self.eos_token_id = eos_token_id + self.tie_word_embeddings = tie_word_embeddings class Qwen3_5MoeVisionConfig(PretrainedConfig): @@ -194,8 +197,9 @@ class Qwen3_5MoeConfig(PretrainedConfig): self.video_token_id = video_token_id self.vision_start_token_id = vision_start_token_id self.vision_end_token_id = vision_end_token_id - self.tie_word_embeddings = tie_word_embeddings super().__init__(**kwargs) + # Set after super().__init__() to avoid v4 PretrainedConfig overwrite + self.tie_word_embeddings = tie_word_embeddings __all__ = ["Qwen3_5MoeConfig", "Qwen3_5MoeTextConfig"]