[CI/Build][Bugfix] Ensure compatibility with transformers 4.52 (#18678)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -226,6 +226,8 @@ VLM_TEST_SETTINGS = {
|
||||
img_idx_to_prompt=lambda idx: "",
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
|
||||
# FIXME: https://github.com/huggingface/transformers/pull/38510
|
||||
marks=[pytest.mark.skip("Model is broken")],
|
||||
),
|
||||
"chameleon": VLMTestInfo(
|
||||
models=["facebook/chameleon-7b"],
|
||||
@@ -281,10 +283,10 @@ VLM_TEST_SETTINGS = {
|
||||
multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.", # noqa: E501
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
dtype="bfloat16",
|
||||
auto_cls=AutoModelForImageTextToText,
|
||||
vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
|
||||
patch_hf_runner=model_utils.gemma3_patch_hf_runner,
|
||||
num_logprobs=10,
|
||||
),
|
||||
"glm4v": VLMTestInfo(
|
||||
models=["THUDM/glm-4v-9b"],
|
||||
@@ -337,7 +339,8 @@ VLM_TEST_SETTINGS = {
|
||||
models=[
|
||||
"OpenGVLab/InternVL2-1B",
|
||||
"OpenGVLab/InternVL2-2B",
|
||||
"OpenGVLab/Mono-InternVL-2B",
|
||||
# FIXME: Config cannot be loaded in transformers 4.52
|
||||
# "OpenGVLab/Mono-InternVL-2B",
|
||||
],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
|
||||
@@ -568,6 +571,8 @@ VLM_TEST_SETTINGS = {
|
||||
max_num_seqs=2,
|
||||
vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
|
||||
prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
|
||||
# FIXME: https://github.com/huggingface/transformers/issues/38358
|
||||
marks=[pytest.mark.skip("Model initialization fails")],
|
||||
),
|
||||
"qwen2_vl": VLMTestInfo(
|
||||
models=["Qwen/Qwen2-VL-2B-Instruct"],
|
||||
|
||||
@@ -100,6 +100,8 @@ def run_test(
|
||||
)
|
||||
|
||||
|
||||
# FIXME: https://github.com/huggingface/transformers/issues/38358
|
||||
@pytest.mark.skip("Model initialization fails")
|
||||
@pytest.mark.core_model
|
||||
@pytest.mark.parametrize("model", MODELS)
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@@ -29,7 +29,7 @@ def vllm_to_hf_output(
|
||||
return output_ids, hf_output_str, out_logprobs
|
||||
|
||||
|
||||
MODEL_NAME = "ibm-granite/granite-speech-3.3-8b"
|
||||
MODEL_NAME = "ibm-granite/granite-speech-3.3-2b"
|
||||
# Audio lora co-exists directly in the model directory, but
|
||||
# currently still needs to be passed directly to vLLM.
|
||||
audio_lora_path = MODEL_NAME
|
||||
|
||||
@@ -122,6 +122,10 @@ def run_test(
|
||||
for prompts, images, audios in inputs
|
||||
]
|
||||
|
||||
# This error occurs inside `get_peft_model`
|
||||
# FIXME: https://huggingface.co/microsoft/Phi-4-multimodal-instruct/discussions/75
|
||||
pytest.skip("HF impl is not compatible with current transformers")
|
||||
|
||||
hf_model_kwargs = {"_attn_implementation": "sdpa"}
|
||||
with hf_runner(model, dtype=dtype,
|
||||
model_kwargs=hf_model_kwargs) as hf_model:
|
||||
|
||||
@@ -10,11 +10,12 @@ from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pytest
|
||||
import regex as re
|
||||
import torch
|
||||
from PIL.Image import Image
|
||||
from transformers import (AutoConfig, AutoTokenizer, BatchFeature,
|
||||
GenerationConfig)
|
||||
GenerationConfig, GenerationMixin)
|
||||
|
||||
from vllm.sequence import SampleLogprobs
|
||||
from vllm.transformers_utils.tokenizer import patch_padding_side
|
||||
@@ -324,6 +325,16 @@ def gemma3_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
|
||||
hf_model.processor = processor
|
||||
|
||||
orig_generate = hf_model.model.generate
|
||||
|
||||
def _generate(self, *args, **kwargs):
|
||||
# FIXME: https://github.com/huggingface/transformers/issues/38333
|
||||
kwargs["disable_compile"] = True
|
||||
|
||||
return orig_generate(*args, **kwargs)
|
||||
|
||||
hf_model.model.generate = types.MethodType(_generate, hf_model.model)
|
||||
|
||||
return hf_model
|
||||
|
||||
|
||||
@@ -610,6 +621,11 @@ def _internvl_generate(
|
||||
if getattr(self, "use_visual_token_mask", False):
|
||||
visual_token_mask = selected.reshape(B, N, 1).to(input_embeds.dtype)
|
||||
forward_kwargs["visual_token_mask"] = visual_token_mask
|
||||
|
||||
# e.g. InternVL2-2B
|
||||
if not isinstance(self.language_model, GenerationMixin):
|
||||
pytest.skip("HF impl is not compatible with current transformers")
|
||||
|
||||
outputs = self.language_model.generate(
|
||||
**forward_kwargs,
|
||||
**generate_kwargs,
|
||||
|
||||
Reference in New Issue
Block a user