[CI/Build] Use AutoModelForImageTextToText to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-03-18 02:35:17 +08:00
committed by GitHub
parent 5340b0e221
commit b89fb2a4a1
3 changed files with 19 additions and 19 deletions

View File

@@ -4,8 +4,8 @@ from typing import Optional, overload
import pytest
import torch
from transformers import (AutoConfig, AutoModelForVision2Seq, AutoTokenizer,
BatchEncoding)
from transformers import (AutoConfig, AutoModelForImageTextToText,
AutoTokenizer, BatchEncoding)
from vllm import LLM, SamplingParams
from vllm.attention.backends.flash_attn import FlashAttentionMetadata
@@ -234,7 +234,7 @@ def _run_test(
dtype=dtype,
model_kwargs={"device_map": "auto"},
postprocess_inputs=process,
auto_cls=AutoModelForVision2Seq) as hf_model:
auto_cls=AutoModelForImageTextToText) as hf_model:
hf_outputs_per_image = [
hf_model.generate_greedy_logprobs_limit(prompts,
max_tokens,