Fix models which use layer_type_validation for Transformers v5 (#37398)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2026-03-18 18:41:51 +00:00
committed by GitHub
parent 738d0a281f
commit 5ce2d10e4a
4 changed files with 45 additions and 16 deletions

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
class OlmoHybridConfig(PretrainedConfig):
@@ -228,7 +228,15 @@ class OlmoHybridConfig(PretrainedConfig):
if "full_attention" not in layer_types:
layer_types[-1] = "full_attention"
layer_type_validation(layer_types, num_hidden_layers)
if hasattr(self, "validate_layer_type"):
# Transformers v5
self.layer_types = layer_types
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(layer_types, num_hidden_layers)
if "linear_attention" not in layer_types:
raise ValueError(
"OLMoHybrid expects at least one 'linear_attention' layer."

View File

@@ -16,7 +16,7 @@
# limitations under the License.
"""Qwen3.5 model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
class Qwen3_5TextConfig(PretrainedConfig):
@@ -68,10 +68,6 @@ class Qwen3_5TextConfig(PretrainedConfig):
eos_token_id=None,
**kwargs,
):
kwargs["ignore_keys_at_rope_validation"] = [
"mrope_section",
"mrope_interleaved",
]
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
@@ -98,7 +94,18 @@ class Qwen3_5TextConfig(PretrainedConfig):
else "full_attention"
for i in range(self.num_hidden_layers)
]
layer_type_validation(self.layer_types, self.num_hidden_layers)
if hasattr(self, "validate_layer_type"):
# Transformers v5
kwargs["ignore_keys_at_rope_validation"] = {
"mrope_section",
"mrope_interleaved",
}
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types, self.num_hidden_layers)
# linear attention part
self.linear_conv_kernel_dim = linear_conv_kernel_dim

View File

@@ -16,7 +16,7 @@
# limitations under the License.
"""Qwen3.5-MoE model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
class Qwen3_5MoeTextConfig(PretrainedConfig):
@@ -75,10 +75,6 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
eos_token_id=None,
**kwargs,
):
kwargs["ignore_keys_at_rope_validation"] = [
"mrope_section",
"mrope_interleaved",
]
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
@@ -104,7 +100,18 @@ class Qwen3_5MoeTextConfig(PretrainedConfig):
else "full_attention"
for i in range(self.num_hidden_layers)
]
layer_type_validation(self.layer_types, self.num_hidden_layers)
if hasattr(self, "validate_layer_type"):
# Transformers v5
kwargs["ignore_keys_at_rope_validation"] = {
"mrope_section",
"mrope_interleaved",
}
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types, self.num_hidden_layers)
# linear attention part
self.linear_conv_kernel_dim = linear_conv_kernel_dim

View File

@@ -16,7 +16,7 @@
# limitations under the License.
"""Qwen3-Next model configuration"""
from transformers.configuration_utils import PretrainedConfig, layer_type_validation
from transformers.configuration_utils import PretrainedConfig
from transformers.utils import logging
logger = logging.get_logger(__name__)
@@ -253,7 +253,14 @@ class Qwen3NextConfig(PretrainedConfig):
"linear_attention" if bool((i + 1) % 4) else "full_attention"
for i in range(self.num_hidden_layers)
]
layer_type_validation(self.layer_types)
if hasattr(self, "validate_layer_type"):
# Transformers v5
self.validate_layer_type()
else:
# Transformers v4
from transformers.configuration_utils import layer_type_validation
layer_type_validation(self.layer_types)
# linear attention part
self.linear_conv_kernel_dim = linear_conv_kernel_dim