[CI/Build] Bump transformers version (#27528)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -7,7 +7,7 @@ requests >= 2.26.0
|
|||||||
tqdm
|
tqdm
|
||||||
blake3
|
blake3
|
||||||
py-cpuinfo
|
py-cpuinfo
|
||||||
transformers >= 4.56.0
|
transformers >= 4.56.0, < 5
|
||||||
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
|
tokenizers >= 0.21.1 # Required for fast incremental detokenization.
|
||||||
protobuf # Required by LlamaTokenizer.
|
protobuf # Required by LlamaTokenizer.
|
||||||
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
|
fastapi[standard] >= 0.115.0 # Required by FastAPI's form models in the OpenAI API server's audio transcriptions endpoint.
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ opencv-python-headless >= 4.11.0 # required for video test
|
|||||||
datamodel_code_generator # required for minicpm3 test
|
datamodel_code_generator # required for minicpm3 test
|
||||||
lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
|
lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
|
||||||
mteb>=1.38.11, <2 # required for mteb test
|
mteb>=1.38.11, <2 # required for mteb test
|
||||||
transformers==4.56.2
|
transformers==4.57.1
|
||||||
tokenizers==0.22.0
|
tokenizers==0.22.0
|
||||||
schemathesis>=3.39.15 # Required for openai schema test.
|
schemathesis>=3.39.15 # Required for openai schema test.
|
||||||
# quantization
|
# quantization
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ datamodel_code_generator # required for minicpm3 test
|
|||||||
# TODO: Use lm-eval[api]==0.4.10 once released
|
# TODO: Use lm-eval[api]==0.4.10 once released
|
||||||
lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
|
lm-eval[api] @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d # required for model evaluation test
|
||||||
mteb[bm25s]>=1.38.11, <2 # required for mteb test
|
mteb[bm25s]>=1.38.11, <2 # required for mteb test
|
||||||
transformers==4.56.2
|
transformers==4.57.1
|
||||||
tokenizers==0.22.0
|
tokenizers==0.22.0
|
||||||
schemathesis>=3.39.15 # Required for openai schema test.
|
schemathesis>=3.39.15 # Required for openai schema test.
|
||||||
# quantization
|
# quantization
|
||||||
|
|||||||
@@ -1196,7 +1196,7 @@ tqdm==4.66.6
|
|||||||
# transformers
|
# transformers
|
||||||
tqdm-multiprocess==0.0.11
|
tqdm-multiprocess==0.0.11
|
||||||
# via lm-eval
|
# via lm-eval
|
||||||
transformers==4.56.2
|
transformers==4.57.1
|
||||||
# via
|
# via
|
||||||
# -r requirements/test.in
|
# -r requirements/test.in
|
||||||
# genai-perf
|
# genai-perf
|
||||||
|
|||||||
@@ -186,6 +186,8 @@ def create_reduced_config(
|
|||||||
if "text_config" in config_dict:
|
if "text_config" in config_dict:
|
||||||
original_text_layers = config_dict["text_config"]["num_hidden_layers"]
|
original_text_layers = config_dict["text_config"]["num_hidden_layers"]
|
||||||
config_dict["text_config"]["num_hidden_layers"] = text_layers
|
config_dict["text_config"]["num_hidden_layers"] = text_layers
|
||||||
|
original_layer_types = config_dict["text_config"]["layer_types"]
|
||||||
|
config_dict["text_config"]["layer_types"] = original_layer_types[:text_layers]
|
||||||
print(f"Reduced text layers from {original_text_layers} to {text_layers}")
|
print(f"Reduced text layers from {original_text_layers} to {text_layers}")
|
||||||
|
|
||||||
original_num_experts = config_dict["text_config"]["num_local_experts"]
|
original_num_experts = config_dict["text_config"]["num_local_experts"]
|
||||||
|
|||||||
@@ -882,27 +882,27 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
|
|||||||
|
|
||||||
_TRANSFORMERS_BACKEND_MODELS = {
|
_TRANSFORMERS_BACKEND_MODELS = {
|
||||||
"TransformersEmbeddingModel": _HfExamplesInfo(
|
"TransformersEmbeddingModel": _HfExamplesInfo(
|
||||||
"BAAI/bge-base-en-v1.5", min_transformers_version="4.57.0.dev0"
|
"BAAI/bge-base-en-v1.5", min_transformers_version="5.0.0"
|
||||||
),
|
),
|
||||||
"TransformersForSequenceClassification": _HfExamplesInfo(
|
"TransformersForSequenceClassification": _HfExamplesInfo(
|
||||||
"papluca/xlm-roberta-base-language-detection",
|
"papluca/xlm-roberta-base-language-detection",
|
||||||
min_transformers_version="4.57.0.dev0",
|
min_transformers_version="5.0.0",
|
||||||
),
|
),
|
||||||
"TransformersForCausalLM": _HfExamplesInfo(
|
"TransformersForCausalLM": _HfExamplesInfo(
|
||||||
"hmellor/Ilama-3.2-1B", trust_remote_code=True
|
"hmellor/Ilama-3.2-1B", trust_remote_code=True
|
||||||
),
|
),
|
||||||
"TransformersMultiModalForCausalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
|
"TransformersMultiModalForCausalLM": _HfExamplesInfo("BAAI/Emu3-Chat-hf"),
|
||||||
"TransformersMoEForCausalLM": _HfExamplesInfo(
|
"TransformersMoEForCausalLM": _HfExamplesInfo(
|
||||||
"allenai/OLMoE-1B-7B-0924", min_transformers_version="4.57.0.dev0"
|
"allenai/OLMoE-1B-7B-0924", min_transformers_version="5.0.0"
|
||||||
),
|
),
|
||||||
"TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
|
"TransformersMultiModalMoEForCausalLM": _HfExamplesInfo(
|
||||||
"Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="4.57.0.dev0"
|
"Qwen/Qwen3-VL-30B-A3B-Instruct", min_transformers_version="5.0.0"
|
||||||
),
|
),
|
||||||
"TransformersMoEEmbeddingModel": _HfExamplesInfo(
|
"TransformersMoEEmbeddingModel": _HfExamplesInfo(
|
||||||
"Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
|
"Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0"
|
||||||
),
|
),
|
||||||
"TransformersMoEForSequenceClassification": _HfExamplesInfo(
|
"TransformersMoEForSequenceClassification": _HfExamplesInfo(
|
||||||
"Qwen/Qwen3-30B-A3B", min_transformers_version="4.57.0.dev0"
|
"Qwen/Qwen3-30B-A3B", min_transformers_version="5.0.0"
|
||||||
),
|
),
|
||||||
"TransformersMultiModalEmbeddingModel": _HfExamplesInfo("google/gemma-3-4b-it"),
|
"TransformersMultiModalEmbeddingModel": _HfExamplesInfo("google/gemma-3-4b-it"),
|
||||||
"TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
|
"TransformersMultiModalForSequenceClassification": _HfExamplesInfo(
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ def test_models(
|
|||||||
from packaging.version import Version
|
from packaging.version import Version
|
||||||
|
|
||||||
installed = Version(transformers.__version__)
|
installed = Version(transformers.__version__)
|
||||||
required = Version("4.57.0.dev0")
|
required = Version("5.0.0")
|
||||||
if model == "allenai/OLMoE-1B-7B-0924" and installed < required:
|
if model == "allenai/OLMoE-1B-7B-0924" and installed < required:
|
||||||
pytest.skip(
|
pytest.skip(
|
||||||
"MoE models with the Transformers backend require "
|
"MoE models with the Transformers backend require "
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ from functools import cached_property
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from transformers.activations import ACT2FN, PytorchGELUTanh
|
from transformers.activations import ACT2FN
|
||||||
from transformers.modeling_utils import PreTrainedModel
|
from transformers.modeling_utils import PreTrainedModel
|
||||||
from transformers.utils import is_flash_attn_2_available
|
from transformers.utils import is_flash_attn_2_available
|
||||||
|
|
||||||
@@ -651,7 +651,7 @@ class MoonVitPretrainedModel(PreTrainedModel):
|
|||||||
"num_heads": config.num_attention_heads,
|
"num_heads": config.num_attention_heads,
|
||||||
"hidden_dim": config.hidden_size,
|
"hidden_dim": config.hidden_size,
|
||||||
"mlp_dim": config.intermediate_size,
|
"mlp_dim": config.intermediate_size,
|
||||||
"activation": PytorchGELUTanh(),
|
"activation": ACT2FN["gelu_pytorch_tanh"],
|
||||||
"attn_bias": True,
|
"attn_bias": True,
|
||||||
"attn_implementation": config._attn_implementation,
|
"attn_implementation": config._attn_implementation,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ import torch
|
|||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
from einops import rearrange, repeat
|
from einops import rearrange, repeat
|
||||||
from transformers import AutoConfig, BatchFeature, PretrainedConfig
|
from transformers import BatchFeature, PretrainedConfig
|
||||||
from transformers.models.qwen2_vl import Qwen2VLImageProcessor, Qwen2VLProcessor
|
from transformers.models.qwen2_vl import Qwen2VLImageProcessor, Qwen2VLProcessor
|
||||||
from transformers.models.qwen2_vl.configuration_qwen2_vl import (
|
from transformers.models.qwen2_vl.configuration_qwen2_vl import (
|
||||||
Qwen2VLConfig,
|
Qwen2VLConfig,
|
||||||
@@ -1651,9 +1651,7 @@ class Tarsier2Processor(Qwen2VLProcessor):
|
|||||||
class Tarsier2ProcessingInfo(Qwen2VLProcessingInfo):
|
class Tarsier2ProcessingInfo(Qwen2VLProcessingInfo):
|
||||||
def get_hf_config(self) -> Qwen2VLConfig:
|
def get_hf_config(self) -> Qwen2VLConfig:
|
||||||
model_path = self.ctx.model_config.model
|
model_path = self.ctx.model_config.model
|
||||||
original_config = AutoConfig.from_pretrained(model_path)
|
correct_config = Qwen2VLConfig.from_pretrained(model_path)
|
||||||
config_dict = original_config.to_dict()
|
|
||||||
correct_config = Qwen2VLConfig.from_dict(config_dict)
|
|
||||||
|
|
||||||
return correct_config
|
return correct_config
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user