[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)
Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
@@ -12,7 +12,7 @@ from vllm.multimodal.video import (rescale_video_size, resize_video,
|
||||
|
||||
from .....conftest import IMAGE_ASSETS, VIDEO_ASSETS
|
||||
from .builders import build_multi_image_inputs, build_single_image_inputs
|
||||
from .types import ImageSizeWrapper, SizeType
|
||||
from .types import ImageSizeWrapper, PromptWithMultiModalInput, SizeType
|
||||
|
||||
|
||||
def multi_image_multi_aspect_ratio_inputs(formatter: Callable[[str], str]):
|
||||
@@ -32,24 +32,28 @@ def multi_image_multi_aspect_ratio_inputs(formatter: Callable[[str], str]):
|
||||
"<image>\nWhat is the season?",
|
||||
]
|
||||
formatted_prompts = [formatter(prompt) for prompt in img_prompts]
|
||||
|
||||
return [(
|
||||
formatted_prompts,
|
||||
aspect_ratio_images = [
|
||||
[stop_sign, cherry_blossom],
|
||||
# Images with different sizes and aspect-ratios
|
||||
[
|
||||
[stop_sign, cherry_blossom],
|
||||
# Images with different sizes and aspect-ratios
|
||||
[
|
||||
rescale_image_size(stop_sign, 0.1),
|
||||
stop_sign,
|
||||
],
|
||||
[
|
||||
stop_sign,
|
||||
rescale_image_size(stop_sign, 0.25),
|
||||
cherry_blossom.resize((183, 488)),
|
||||
cherry_blossom.resize((488, 183))
|
||||
],
|
||||
cherry_blossom,
|
||||
])]
|
||||
rescale_image_size(stop_sign, 0.1),
|
||||
stop_sign,
|
||||
],
|
||||
[
|
||||
stop_sign,
|
||||
rescale_image_size(stop_sign, 0.25),
|
||||
cherry_blossom.resize((183, 488)),
|
||||
cherry_blossom.resize((488, 183))
|
||||
],
|
||||
cherry_blossom,
|
||||
]
|
||||
|
||||
return [
|
||||
PromptWithMultiModalInput(
|
||||
prompts=formatted_prompts,
|
||||
image_data=aspect_ratio_images,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def multi_video_multi_aspect_ratio_inputs(formatter: Callable[[str], str],
|
||||
@@ -68,24 +72,28 @@ def multi_video_multi_aspect_ratio_inputs(formatter: Callable[[str], str],
|
||||
"<video>\nWhy is this video funny?",
|
||||
]
|
||||
formatted_prompts = [formatter(prompt) for prompt in video_prompts]
|
||||
|
||||
return [(
|
||||
formatted_prompts,
|
||||
aspect_ratio_videos = [
|
||||
[video, video],
|
||||
# Videos with different sizes and aspect-ratios
|
||||
[
|
||||
[video, video],
|
||||
# Videos with different sizes and aspect-ratios
|
||||
[
|
||||
rescale_video_size(video, 0.1),
|
||||
video,
|
||||
],
|
||||
[
|
||||
video,
|
||||
rescale_video_size(video, 0.25),
|
||||
resize_video(video, (183, 488)),
|
||||
resize_video(video, (488, 183))
|
||||
],
|
||||
rescale_video_size(video, 0.1),
|
||||
video,
|
||||
])]
|
||||
],
|
||||
[
|
||||
video,
|
||||
rescale_video_size(video, 0.25),
|
||||
resize_video(video, (183, 488)),
|
||||
resize_video(video, (488, 183))
|
||||
],
|
||||
video,
|
||||
]
|
||||
|
||||
return [
|
||||
PromptWithMultiModalInput(
|
||||
prompts=formatted_prompts,
|
||||
video_data=aspect_ratio_videos,
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def different_patch_input_cases_internvl():
|
||||
|
||||
Reference in New Issue
Block a user