diff --git a/tests/models/registry.py b/tests/models/registry.py index 24d96cfda..a62937ef6 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -256,7 +256,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ), "Exaone4ForCausalLM": _HfExamplesInfo("LGAI-EXAONE/EXAONE-4.0-32B"), "ExaoneMoEForCausalLM": _HfExamplesInfo( - "LGAI-EXAONE/K-EXAONE-236B-A23B", min_transformers_version="5.0.0" + "LGAI-EXAONE/K-EXAONE-236B-A23B", min_transformers_version="5.1.0" ), "Fairseq2LlamaForCausalLM": _HfExamplesInfo("mgleize/fairseq2-dummy-Llama-3.2-1B"), "FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"), @@ -273,7 +273,7 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { "Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5"), "Glm4MoeLiteForCausalLM": _HfExamplesInfo( "zai-org/GLM-4.7-Flash", - min_transformers_version="5.0.0.dev", + min_transformers_version="5.0.0", ), "GPT2LMHeadModel": _HfExamplesInfo("openai-community/gpt2", {"alias": "gpt2"}), "GPTBigCodeForCausalLM": _HfExamplesInfo( @@ -650,7 +650,7 @@ _MULTIMODAL_EXAMPLE_MODELS = { # [Decoder-only] "AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"), "AudioFlamingo3ForConditionalGeneration": _HfExamplesInfo( - "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0.dev" + "nvidia/audio-flamingo-3-hf", min_transformers_version="5.0.0" ), "AyaVisionForConditionalGeneration": _HfExamplesInfo("CohereLabs/aya-vision-8b"), "BagelForConditionalGeneration": _HfExamplesInfo("ByteDance-Seed/BAGEL-7B-MoT"), @@ -693,7 +693,7 @@ _MULTIMODAL_EXAMPLE_MODELS = { "GlmAsrForConditionalGeneration": _HfExamplesInfo( "zai-org/GLM-ASR-Nano-2512", trust_remote_code=True, - min_transformers_version="5.0", + min_transformers_version="5.0.0", ), "GraniteVision": _HfExamplesInfo("ibm-granite/granite-vision-3.3-2b"), "GraniteSpeechForConditionalGeneration": _HfExamplesInfo( @@ -709,7 +709,7 @@ _MULTIMODAL_EXAMPLE_MODELS = { "GlmOcrForConditionalGeneration": _HfExamplesInfo( "zai-org/GLM-OCR", is_available_online=False, - min_transformers_version="5.0.0.dev", + min_transformers_version="5.1.0", ), "H2OVLChatModel": _HfExamplesInfo( "h2oai/h2ovl-mississippi-800m", @@ -1048,7 +1048,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = { "ExaoneMoeMTP": _HfExamplesInfo( "LGAI-EXAONE/K-EXAONE-236B-A23B", speculative_model="LGAI-EXAONE/K-EXAONE-236B-A23B", - min_transformers_version="5.0.0", + min_transformers_version="5.1.0", ), "Glm4MoeMTPModel": _HfExamplesInfo( "zai-org/GLM-4.5", @@ -1057,13 +1057,13 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = { "Glm4MoeLiteMTPModel": _HfExamplesInfo( "zai-org/GLM-4.7-Flash", speculative_model="zai-org/GLM-4.7-Flash", - min_transformers_version="5.0.0.dev", + min_transformers_version="5.0.0", ), "GlmOcrMTPModel": _HfExamplesInfo( "zai-org/GLM-OCR", speculative_model="zai-org/GLM-OCR", is_available_online=False, - min_transformers_version="5.0.0.dev", + min_transformers_version="5.1.0", ), "LongCatFlashMTPModel": _HfExamplesInfo( "meituan-longcat/LongCat-Flash-Chat", @@ -1090,27 +1090,27 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = { _TRANSFORMERS_BACKEND_MODELS = { "TransformersEmbeddingModel": _HfExamplesInfo( - "BAAI/bge-base-en-v1.5", min_transformers_version="5.0.0.dev" + "BAAI/bge-base-en-v1.5", min_transformers_version="5.0.0" ), "TransformersForSequenceClassification": _HfExamplesInfo( "papluca/xlm-roberta-base-language-detection", - min_transformers_version="5.0.0.dev", + min_transformers_version="5.0.0", ), "TransformersForCausalLM": _HfExamplesInfo( "hmellor/Ilama-3.2-1B", trust_remote_code=True ), "TransformersMultiModalForCausalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"), "TransformersMoEForCausalLM": _HfExamplesInfo( - "allenai/OLMoE-1B-7B-0924", min_transformers_version="5.0.0.dev" + "allenai/OLMoE-1B-7B-0924", min_transformers_version="5.0.0" ), "TransformersMultiModalMoEForCausalLM": _HfExamplesInfo( - "Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="5.0.0.dev" + "Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="5.0.0" ), "TransformersMoEEmbeddingModel": _HfExamplesInfo( - "Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0.dev" + "Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0" ), "TransformersMoEForSequenceClassification": _HfExamplesInfo( - "Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0.dev" + "Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0" ), "TransformersMultiModalEmbeddingModel": _HfExamplesInfo("google/gemma-3-4b-it"), "TransformersMultiModalForSequenceClassification": _HfExamplesInfo( diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index c642ff1ee..15ebb5f4a 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -78,7 +78,7 @@ def test_models( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.0.0.dev") + required = Version("5.0.0") if model == "allenai/OLMoE-1B-7B-0924" and installed < required: pytest.skip( "MoE models with the Transformers modeling backend require " diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index 51725a306..02e152914 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -455,7 +455,7 @@ def test_eagle_correctness( from packaging.version import Version installed = Version(transformers.__version__) - required = Version("5.0.0.dev") + required = Version("5.0.0") if installed < required: pytest.skip( "Eagle3 with the Transformers modeling backend requires " diff --git a/vllm/model_executor/models/transformers/base.py b/vllm/model_executor/models/transformers/base.py index d094bb289..d3f248f8b 100644 --- a/vllm/model_executor/models/transformers/base.py +++ b/vllm/model_executor/models/transformers/base.py @@ -350,7 +350,7 @@ class Base( # vLLM does not support encoder-decoder models, so if any encoder layer is # found in a text only model, we assume the whole model is an encoder model if has_encoder(self.model) and not is_multimodal(self.config): - self.check_version("5.0.0.dev0", "encoder models support") + self.check_version("5.0.0", "encoder models support") attn_type = AttentionType.ENCODER_ONLY else: attn_type = AttentionType.DECODER @@ -502,7 +502,7 @@ class Base( ) def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None: - self.check_version("5.0.0.dev0", "Eagle3 support") + self.check_version("5.0.0", "Eagle3 support") from transformers.utils.generic import OutputRecorder # The default value in PreTrainedModel is None diff --git a/vllm/model_executor/models/transformers/moe.py b/vllm/model_executor/models/transformers/moe.py index 2fa23f96f..c636da211 100644 --- a/vllm/model_executor/models/transformers/moe.py +++ b/vllm/model_executor/models/transformers/moe.py @@ -118,7 +118,7 @@ direct_register_custom_op( class MoEMixin(MixtureOfExperts): def __init__(self, *, vllm_config: "VllmConfig", prefix: str = ""): - self.check_version("5.0.0.dev0", "MoE models support") + self.check_version("5.0.0", "MoE models support") # Skip MixtureOfExperts.__init__ and call the next class in MRO super(MixtureOfExperts, self).__init__(vllm_config=vllm_config, prefix=prefix) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index a009017e5..fd44be481 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -328,7 +328,7 @@ def patch_rope_parameters(config: PretrainedConfig) -> None: partial_rotary_factor = getattr_iter(config, names, None, warn=True) ompe = getattr(config, "original_max_position_embeddings", None) - if Version(version("transformers")) < Version("5.0.0.dev0"): + if Version(version("transformers")) < Version("5.0.0"): # Transformers v4 installed, legacy config fields may be present if (rope_scaling := getattr(config, "rope_scaling", None)) is not None: config.rope_parameters = rope_scaling