[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
This commit is contained in:
Isotr0py
2025-05-16 17:18:08 +08:00
committed by GitHub
parent 541817670c
commit 390ec88905
9 changed files with 282 additions and 215 deletions

View File

@@ -4,8 +4,8 @@ types / modalities.
"""
from pathlib import PosixPath
from .....conftest import (HfRunner, ImageTestAssets, VideoTestAssets,
VllmRunner)
from .....conftest import (AudioTestAssets, HfRunner, ImageTestAssets,
VideoTestAssets, VllmRunner)
from . import builders, core
from .types import ExpandableVLMTestArgs, VLMTestInfo
@@ -30,7 +30,6 @@ def run_single_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
num_logprobs=test_case.num_logprobs,
limit_mm_per_prompt={"image": 1},
distributed_executor_backend=test_case.distributed_executor_backend,
runner_mm_key="images",
**model_test_info.get_non_parametrized_runner_kwargs())
@@ -53,7 +52,6 @@ def run_multi_image_test(*, tmp_path: PosixPath, model_test_info: VLMTestInfo,
num_logprobs=test_case.num_logprobs,
limit_mm_per_prompt={"image": len(image_assets)},
distributed_executor_backend=test_case.distributed_executor_backend,
runner_mm_key="images",
**model_test_info.get_non_parametrized_runner_kwargs())
@@ -77,7 +75,6 @@ def run_embedding_test(*, model_test_info: VLMTestInfo,
limit_mm_per_prompt={"image": 1},
vllm_embeddings=vllm_embeddings,
distributed_executor_backend=test_case.distributed_executor_backend,
runner_mm_key="images",
**model_test_info.get_non_parametrized_runner_kwargs())
@@ -105,7 +102,30 @@ def run_video_test(
num_logprobs=test_case.num_logprobs,
limit_mm_per_prompt={"video": len(video_assets)},
distributed_executor_backend=test_case.distributed_executor_backend,
runner_mm_key="videos",
**model_test_info.get_non_parametrized_runner_kwargs())
def run_audio_test(
*,
model_test_info: VLMTestInfo,
test_case: ExpandableVLMTestArgs,
hf_runner: type[HfRunner],
vllm_runner: type[VllmRunner],
audio_assets: AudioTestAssets,
):
inputs = builders.build_audio_inputs_from_test_info(
model_test_info, audio_assets)
core.run_test(
hf_runner=hf_runner,
vllm_runner=vllm_runner,
inputs=inputs,
model=test_case.model,
dtype=test_case.dtype,
max_tokens=test_case.max_tokens,
num_logprobs=test_case.num_logprobs,
limit_mm_per_prompt={"audio": 1},
distributed_executor_backend=test_case.distributed_executor_backend,
**model_test_info.get_non_parametrized_runner_kwargs())
@@ -120,11 +140,9 @@ def run_custom_inputs_test(*, model_test_info: VLMTestInfo,
inputs = test_case.custom_test_opts.inputs
limit_mm_per_prompt = test_case.custom_test_opts.limit_mm_per_prompt
runner_mm_key = test_case.custom_test_opts.runner_mm_key
# Inputs, limit_mm_per_prompt, and runner_mm_key should all be set
# Inputs and limit_mm_per_prompt should all be set
assert inputs is not None
assert limit_mm_per_prompt is not None
assert runner_mm_key is not None
core.run_test(
hf_runner=hf_runner,
@@ -136,5 +154,4 @@ def run_custom_inputs_test(*, model_test_info: VLMTestInfo,
num_logprobs=test_case.num_logprobs,
limit_mm_per_prompt=limit_mm_per_prompt,
distributed_executor_backend=test_case.distributed_executor_backend,
runner_mm_key=runner_mm_key,
**model_test_info.get_non_parametrized_runner_kwargs())