[Model] support new model ovis2.5 (#23084)
Signed-off-by: myselvess <244285088@qq.com> Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -11,6 +11,7 @@ from pathlib import PosixPath
|
||||
import pytest
|
||||
from transformers import (AutoModel, AutoModelForImageTextToText,
|
||||
AutoModelForTextToWaveform, AutoModelForVision2Seq)
|
||||
from transformers.utils import is_flash_attn_2_available
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.utils import identity
|
||||
@@ -621,6 +622,26 @@ VLM_TEST_SETTINGS = {
|
||||
hf_model_kwargs={"llm_attn_implementation": "sdpa"},
|
||||
patch_hf_runner=model_utils.ovis_patch_hf_runner,
|
||||
),
|
||||
"ovis2_5": VLMTestInfo(
|
||||
models=["AIDC-AI/Ovis2.5-2B"],
|
||||
test_type=(
|
||||
VLMTestType.IMAGE,
|
||||
VLMTestType.MULTI_IMAGE,
|
||||
VLMTestType.VIDEO
|
||||
),
|
||||
prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501
|
||||
img_idx_to_prompt=lambda idx: "<image>\n", # noqa: E501
|
||||
video_idx_to_prompt=lambda idx: "<video>\n",
|
||||
max_model_len=4096,
|
||||
max_num_seqs=2,
|
||||
dtype="half",
|
||||
num_logprobs=10,
|
||||
patch_hf_runner=model_utils.ovis2_5_patch_hf_runner,
|
||||
marks=[pytest.mark.skipif(
|
||||
not is_flash_attn_2_available(),
|
||||
reason="HF model needs `flash_attn` installed"
|
||||
)],
|
||||
),
|
||||
"phi3v": VLMTestInfo(
|
||||
models=["microsoft/Phi-3.5-vision-instruct"],
|
||||
test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
|
||||
|
||||
@@ -10,6 +10,7 @@ from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import PIL.Image
|
||||
import pytest
|
||||
import regex as re
|
||||
import torch
|
||||
@@ -810,6 +811,63 @@ def ovis_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
return hf_model
|
||||
|
||||
|
||||
def ovis2_5_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
"""Patches and returns an instance of the HfRunner to use for Ovis2."""
|
||||
hf_model.model.get_output_embeddings = lambda: \
|
||||
hf_model.model.llm.get_output_embeddings()
|
||||
|
||||
def processor(*args, text="", images=None, videos=None, **kwargs):
|
||||
if images is None:
|
||||
images = []
|
||||
else:
|
||||
images = [images] if isinstance(images, Image) else images
|
||||
if videos is None:
|
||||
videos = []
|
||||
else:
|
||||
videos = [videos] if isinstance(videos, np.ndarray) else videos
|
||||
videos = [[PIL.Image.fromarray(frame) for frame in vid]
|
||||
for vid in videos]
|
||||
|
||||
prompt_start_and_end = {
|
||||
"qwen2": ("<|im_start|>user\n", "<|im_end|>\n"),
|
||||
"llama":
|
||||
("<|start_header_id|>user<|end_header_id|>\n\n", "<|eot_id|>"),
|
||||
"gemma2": ("<start_of_turn>user\n", "<end_of_turn>\n"),
|
||||
}
|
||||
for start, end in prompt_start_and_end.values():
|
||||
if start in text and end in text:
|
||||
text = text.split(start)[1].split(end)[0]
|
||||
break
|
||||
|
||||
images_message = [{"type": "image", "image": img} for img in images]
|
||||
videos_message = [{"type": "video", "video": vid} for vid in videos]
|
||||
|
||||
messages = [{
|
||||
"role":
|
||||
"user",
|
||||
"content": [
|
||||
*images_message,
|
||||
*videos_message,
|
||||
{
|
||||
"type": "text",
|
||||
"text": text
|
||||
},
|
||||
],
|
||||
}]
|
||||
|
||||
input_ids, pixel_values, grid_thws = hf_model.model.preprocess_inputs(
|
||||
messages=messages, enable_thinking=True)
|
||||
inputs = {
|
||||
"inputs": input_ids,
|
||||
"pixel_values": pixel_values,
|
||||
"grid_thws": grid_thws,
|
||||
}
|
||||
return BatchFeature(data=inputs, tensor_type="pt")
|
||||
|
||||
hf_model.processor = processor
|
||||
return hf_model
|
||||
|
||||
|
||||
def qwen2_5_omni_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
|
||||
"""Patches and returns an instance of the HfRunner for Qwen2.5-Omni."""
|
||||
thinker = hf_model.model.thinker
|
||||
|
||||
Reference in New Issue
Block a user