tests/models/multimodal/generation/test_common.py

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Common tests for testing .generate() functionality for single / multiple
image, embedding, and video support for different VLMs in vLLM.
"""

import math
from collections import defaultdict
from pathlib import PosixPath

import pytest
from packaging.version import Version
from transformers import (
    AutoModel,
    AutoModelForCausalLM,
    AutoModelForImageTextToText,
    AutoModelForTextToWaveform,
)
from transformers import __version__ as TRANSFORMERS_VERSION

from vllm.platforms import current_platform
from vllm.utils.func_utils import identity

from ....conftest import (
    IMAGE_ASSETS,
    AudioTestAssets,
    HfRunner,
    ImageTestAssets,
    VideoTestAssets,
    VllmRunner,
)
from ....utils import create_new_process_for_each_test, large_gpu_mark, multi_gpu_marks
from ...utils import check_outputs_equal
from .vlm_utils import custom_inputs, model_utils, runners
from .vlm_utils.case_filtering import get_parametrized_options
from .vlm_utils.types import (
    CustomTestOptions,
    ExpandableVLMTestArgs,
    VLMTestInfo,
    VLMTestType,
)

COMMON_BROADCAST_SETTINGS = {
    "test_type": VLMTestType.IMAGE,
    "dtype": "half",
    "max_tokens": 5,
    "tensor_parallel_size": 2,
    "hf_model_kwargs": {"device_map": "auto"},
    "image_size_factors": [(0.25, 0.5, 1.0)],
    "distributed_executor_backend": (
        "ray",
        "mp",
    ),
}

### Test configuration for specific models
# NOTE: The convention of the test settings below is to lead each test key
# with the name of the model arch used in the test, using underscores in place
# of hyphens; this makes it more convenient to filter tests for a specific kind
# of model. For example....
#
# To run all test types for a specific key:
#     use the k flag to substring match with a leading square bracket; if the
#     model arch happens to be a substring of another one, you can add a
#     trailing hyphen. E.g.,
#                 - pytest $TEST_FILE -k "[llava-"
#     prevents matching on "[llava_next-" & will match just the enabled cases
#     for llava, i.e., single image, image embedding, and custom input tests.
#
# To run a test for a Test Info for just one of multiple models:
#     use the k flag to substring match the model name, e.g.,
#                 - pytest $TEST_FILE -k OpenGVLab/InternVL2-1B
#     prevents matching on nGVLab/InternVL2-2B.
#
# You can also combine substrings to match more granularly.
#     ex 1:
#        pytest $TEST_FILE -k "test_single_image and OpenGVLab/InternVL2-1B"
#     will run only test_single_image* for OpenGVLab/InternVL2-1B; this would
#     match both wrappers for single image tests, since it also matches
#     test_single_image_heavy (which forks if we have a distributed backend)
#     ex 2:
#        pytest $TEST_FILE -k  "[llava- or [intern_vl-"
#     will run all of the tests for only llava & internvl.
#
# NOTE you can add --collect-only to any of the above commands to see
# which cases would be selected and deselected by pytest. In general,
# this is a good idea for checking your command first, since tests are slow.

VLM_TEST_SETTINGS = {
    #### Core tests to always run in the CI
    "llava": VLMTestInfo(
        models=["llava-hf/llava-1.5-7b-hf"],
        test_type=(VLMTestType.EMBEDDING, VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        convert_assets_to_embeddings=model_utils.get_llava_embeddings,
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(
                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
                    formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:"
                ),
                limit_mm_per_prompt={"image": 4},
            )
        ],
        vllm_runner_kwargs={"enable_mm_embeds": True},
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    "paligemma": VLMTestInfo(
        models=["google/paligemma-3b-mix-224"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=identity,
        img_idx_to_prompt=lambda idx: "",
        # Paligemma uses its own sample prompts because the default one fails
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "caption es",
                "cherry_blossom": "What is in the picture?",
            }
        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
        dtype="bfloat16",
        marks=[
            pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
        ],
    ),
    "qwen2_5_vl": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        enforce_eager=False,
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    "qwen2_5_omni": VLMTestInfo(
        models=["Qwen/Qwen2.5-Omni-3B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",
        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",
        max_model_len=4096,
        max_num_seqs=2,
        num_logprobs=6 if current_platform.is_cpu() else 5,
        auto_cls=AutoModelForTextToWaveform,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    "qwen3_vl": VLMTestInfo(
        models=["Qwen/Qwen3-VL-4B-Instruct"],
        test_type=(
            VLMTestType.IMAGE,
            VLMTestType.MULTI_IMAGE,
            VLMTestType.VIDEO,
        ),
        enforce_eager=False,
        needs_video_metadata=True,
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        num_logprobs=20,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        patch_hf_runner=model_utils.qwen3_vl_patch_hf_runner,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[
            pytest.mark.core_model,
        ],
    ),
    "ultravox": VLMTestInfo(
        models=["fixie-ai/ultravox-v0_5-llama-3_2-1b"],
        test_type=VLMTestType.AUDIO,
        prompt_formatter=lambda audio_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{audio_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        audio_idx_to_prompt=lambda idx: "<|audio|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModel,
        hf_output_post_proc=model_utils.ultravox_trunc_hf_output,
        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
    ),
    #### Transformers fallback to test
    ## To reduce test burden, we only test batching arbitrary image size
    # Dynamic image length and number of patches
    "llava-onevision-transformers": VLMTestInfo(
        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        image_size_factors=[(0.25, 0.5, 1.0)],
        vllm_runner_kwargs={
            "model_impl": "transformers",
            "default_torch_num_threads": 1,
        },
        # FIXME: Investigate why the test hangs
        # when processing the 3rd prompt in vLLM
        marks=[pytest.mark.core_model, pytest.mark.skip(reason="Test hangs")],
    ),
    # Gemma3 has bidirectional mask on images
    "gemma3-transformers": VLMTestInfo(
        models=["google/gemma-3-4b-it"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda vid_prompt: f"<'<bos><start_of_turn>user\n{vid_prompt}<start_of_image><end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.gemma3_vllm_to_hf_output,
        image_size_factors=[(0.25, 0.5, 1.0)],
        vllm_runner_kwargs={
            "model_impl": "transformers",
        },
        marks=[pytest.mark.core_model],
    ),
    "idefics3-transformers": VLMTestInfo(
        models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>",
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
        image_size_factors=[(0.25, 0.5, 1.0)],
        vllm_runner_kwargs={
            "model_impl": "transformers",
        },
        marks=[pytest.mark.core_model],
    ),
    # Pixel values from processor are not 4D or 5D arrays
    "qwen2_5_vl-transformers": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        image_size_factors=[(0.25, 0.2, 0.15)],
        vllm_runner_kwargs={
            "model_impl": "transformers",
        },
        marks=[large_gpu_mark(min_gb=32)],
    ),
    #### Extended model tests
    "aria": VLMTestInfo(
        models=["rhymes-ai/Aria"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n ",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<fim_prefix><|img|><fim_suffix>\n",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<vlm_image>Please describe the image shortly.",
                "cherry_blossom": "<vlm_image>Please infer the season with reason.",
            }
        ),
        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",
        stop_str=["<|im_end|>"],
        image_size_factors=[(0.10, 0.15)],
        max_tokens=64,
        marks=[large_gpu_mark(min_gb=64)],
    ),
    "aya_vision": VLMTestInfo(
        models=["CohereForAI/aya-vision-8b"],
        test_type=(VLMTestType.IMAGE),
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<image>What's the content in the center of the image?",
                "cherry_blossom": "<image>What is the season?",
            }
        ),
        multi_image_prompt="<image><image>Describe the two images in detail.",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
    ),
    "aya_vision-multi_image": VLMTestInfo(
        models=["CohereForAI/aya-vision-8b"],
        test_type=(VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<image>What's the content in the center of the image?",
                "cherry_blossom": "<image>What is the season?",
            }
        ),
        multi_image_prompt="<image><image>Describe the two images in detail.",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "blip2": VLMTestInfo(
        models=["Salesforce/blip2-opt-2.7b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
        img_idx_to_prompt=lambda idx: "",
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
        # FIXME: https://github.com/huggingface/transformers/pull/38510
        marks=[pytest.mark.skip("Model is broken")],
    ),
    "chameleon": VLMTestInfo(
        models=["facebook/chameleon-7b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        # For chameleon, we only compare the sequences
        vllm_output_post_proc=lambda vllm_output, model: vllm_output[:2],
        hf_output_post_proc=lambda hf_output, model: hf_output[:2],
        comparator=check_outputs_equal,
        max_tokens=8,
        dtype="bfloat16",
    ),
    "deepseek_vl_v2": VLMTestInfo(
        models=["Isotr0py/deepseek-vl2-tiny"],  # model repo using dynamic module
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nPlease infer the season with reason in details.",  # noqa: E501
            }
        ),
        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",  # noqa: E501
        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],
        image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
    ),
    "fuyu": VLMTestInfo(
        models=["adept/fuyu-8b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"{img_prompt}\n",
        img_idx_to_prompt=lambda idx: "",
        max_model_len=2048,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        use_tokenizer_eos=True,
        vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
        num_logprobs=10,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "gemma3": VLMTestInfo(
        models=["google/gemma-3-4b-it"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
                "cherry_blossom": "<start_of_image>What is the season?",
            }
        ),
        multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
        patch_hf_runner=model_utils.gemma3_patch_hf_runner,
        num_logprobs=10,
    ),
    "glm4v": VLMTestInfo(
        models=["zai-org/glm-4v-9b"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
                "cherry_blossom": "<|begin_of_image|><|endoftext|><|end_of_image|>What is the season?",  # noqa: E501
            }
        ),
        max_model_len=2048,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
        patch_hf_runner=model_utils.glm4v_patch_hf_runner,
        # The image embeddings match with HF but the outputs of the language
        # decoder are only consistent up to 2 decimal places.
        # So, we need to reduce the number of tokens for the test to pass.
        max_tokens=8,
        num_logprobs=10,
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "glm4_1v": VLMTestInfo(
        models=["zai-org/GLM-4.1V-9B-Thinking"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
        max_model_len=2048,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
        num_logprobs=10,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        auto_cls=AutoModelForImageTextToText,
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "glm4_1v-video": VLMTestInfo(
        models=["zai-org/GLM-4.1V-9B-Thinking"],
        # GLM4.1V require include video metadata for input
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        patch_hf_runner=model_utils.glm4_1v_patch_hf_runner,
        custom_test_opts=[
            CustomTestOptions(
                inputs=custom_inputs.video_with_metadata_glm4_1v(),
                limit_mm_per_prompt={"video": 1},
            )
        ],
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "h2ovl": VLMTestInfo(
        models=[
            "h2oai/h2ovl-mississippi-800m",
            "h2oai/h2ovl-mississippi-2b",
        ],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
        max_model_len=8192,
        use_tokenizer_eos=True,
        num_logprobs=10,
        patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
    ),
    "idefics3": VLMTestInfo(
        models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>",
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
    ),
    "intern_vl": VLMTestInfo(
        models=[
            "OpenGVLab/InternVL2-1B",
            "OpenGVLab/InternVL2-2B",
            # FIXME: Config cannot be loaded in transformers 4.52
            # "OpenGVLab/Mono-InternVL-2B",
        ],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.internvl_patch_hf_runner,
    ),
    "intern_vl-video": VLMTestInfo(
        models=[
            "OpenGVLab/InternVL3-1B",
        ],
        test_type=VLMTestType.VIDEO,
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
        video_idx_to_prompt=lambda idx: "<video>",
        max_model_len=8192,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.internvl_patch_hf_runner,
    ),
    "intern_vl-hf": VLMTestInfo(
        models=["OpenGVLab/InternVL3-1B-hf"],
        test_type=(
            VLMTestType.IMAGE,
            VLMTestType.MULTI_IMAGE,
            VLMTestType.VIDEO,
        ),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<IMG_CONTEXT>",
        video_idx_to_prompt=lambda idx: "<video>",
        max_model_len=8192,
        use_tokenizer_eos=True,
        auto_cls=AutoModelForImageTextToText,
    ),
    "kimi_vl": VLMTestInfo(
        models=["moonshotai/Kimi-VL-A3B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_user|>user<|im_middle|>{img_prompt}<|im_end|><|im_assistant|>assistant<|im_middle|>",  # noqa: E501
        img_idx_to_prompt=lambda _: "<|media_start|>image<|media_content|><|media_pad|><|media_end|>",  # noqa: E501
        max_model_len=8192,
        max_num_seqs=2,
        dtype="bfloat16",
        tensor_parallel_size=1,
        vllm_output_post_proc=model_utils.kimiv_vl_vllm_to_hf_output,
        marks=[large_gpu_mark(min_gb=48)],
    ),
    "llama4": VLMTestInfo(
        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda _: "<|image|>",
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        distributed_executor_backend="mp",
        image_size_factors=[(0.25, 0.5, 1.0)],
        hf_model_kwargs={"device_map": "auto"},
        max_model_len=8192,
        max_num_seqs=4,
        dtype="bfloat16",
        auto_cls=AutoModelForImageTextToText,
        tensor_parallel_size=4,
        marks=multi_gpu_marks(num_gpus=4),
    ),
    "llava_next": VLMTestInfo(
        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
        test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
        max_model_len=10240,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(
                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
                    formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]"
                ),
                limit_mm_per_prompt={"image": 4},
            )
        ],
    ),
    "llava_onevision": VLMTestInfo(
        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
        test_type=VLMTestType.CUSTOM_INPUTS,
        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        num_video_frames=16,
        max_model_len=16384,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
        ),
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(
                inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs(
                    formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
                ),
                limit_mm_per_prompt={"video": 4},
            )
        ],
    ),
    "llava_next_video": VLMTestInfo(
        models=["llava-hf/LLaVA-NeXT-Video-7B-hf"],
        test_type=VLMTestType.VIDEO,
        prompt_formatter=lambda vid_prompt: f"USER: {vid_prompt} ASSISTANT:",
        num_video_frames=16,
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_video_vllm_to_hf_output,
    ),
    "mantis": VLMTestInfo(
        models=["TIGER-Lab/Mantis-8B-siglip-llama3"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        max_model_len=4096,
        get_stop_token_ids=lambda tok: [128009],
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
        patch_hf_runner=model_utils.mantis_patch_hf_runner,
    ),
    "minicpmv_25": VLMTestInfo(
        models=["openbmb/MiniCPM-Llama3-V-2_5"],
        test_type=VLMTestType.IMAGE,
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
        max_model_len=4096,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner,
        # FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55
        marks=[pytest.mark.skip("HF import fails")],
    ),
    "minicpmo_26": VLMTestInfo(
        models=["openbmb/MiniCPM-o-2_6"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
        max_model_len=4096,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
            ["<|im_end|>", "<|endoftext|>"]
        ),
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
        # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
        marks=[pytest.mark.skip("HF import fails")],
    ),
    "minicpmv_26": VLMTestInfo(
        models=["openbmb/MiniCPM-V-2_6"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
        max_model_len=4096,
        max_num_seqs=2,
        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
            ["<|im_end|>", "<|endoftext|>"]
        ),
        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
    ),
    "minimax_vl_01": VLMTestInfo(
        models=["MiniMaxAI/MiniMax-VL-01"],
        prompt_formatter=lambda img_prompt: f"<beginning_of_sentence>user: {img_prompt} assistant:<end_of_sentence>",  # noqa: E501
        img_idx_to_prompt=lambda _: "<image>",
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        max_model_len=8192,
        max_num_seqs=4,
        dtype="bfloat16",
        hf_output_post_proc=model_utils.minimax_vl_01_hf_output,
        patch_hf_runner=model_utils.minimax_vl_01_patch_hf_runner,
        auto_cls=AutoModelForImageTextToText,
        marks=[large_gpu_mark(min_gb=80)],
    ),
    "molmo": VLMTestInfo(
        models=["allenai/Molmo-7B-D-0924"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=identity,
        max_model_len=4096,
        max_num_seqs=2,
        patch_hf_runner=model_utils.molmo_patch_hf_runner,
    ),
    "ovis1_6-gemma2": VLMTestInfo(
        models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
        # use sdpa mode for hf runner since ovis2 didn't work with flash_attn
        hf_model_kwargs={"llm_attn_implementation": "sdpa"},
        patch_hf_runner=model_utils.ovis_patch_hf_runner,
        marks=[large_gpu_mark(min_gb=32)],
    ),
    "ovis2": VLMTestInfo(
        models=["AIDC-AI/Ovis2-1B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
        # use sdpa mode for hf runner since ovis2 didn't work with flash_attn
        hf_model_kwargs={"llm_attn_implementation": "sdpa"},
        patch_hf_runner=model_utils.ovis_patch_hf_runner,
    ),
    "ovis2_5": VLMTestInfo(
        models=["AIDC-AI/Ovis2.5-2B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>\n",
        video_idx_to_prompt=lambda idx: "<video>\n",
        max_model_len=4096,
        max_num_seqs=2,
        dtype="half",
        num_logprobs=10,
        patch_hf_runner=model_utils.ovis2_5_patch_hf_runner,
        hf_model_kwargs={"revision": "refs/pr/5"},
    ),
    "paddleocr_vl": VLMTestInfo(
        models=["PaddlePaddle/PaddleOCR-VL"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        img_idx_to_prompt=lambda idx: (
            "<|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>"
        ),
        multi_image_prompt=(
            "Image-1: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>\n"
            "Image-2: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>\n"
            "Describe these two images separately."
        ),
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForCausalLM,
        image_size_factors=[(), (0.25,)],
        marks=[
            pytest.mark.skipif(
                Version(TRANSFORMERS_VERSION) == Version("4.57.3"),
                reason="This model is broken in Transformers v4.57.3",
            )
        ],
    ),
    "phi3v": VLMTestInfo(
        models=["microsoft/Phi-3.5-vision-instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|end|>\n<|assistant|>\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
        max_model_len=4096,
        max_num_seqs=2,
        runner="generate",
        # use sdpa mode for hf runner since phi3v didn't work with flash_attn
        hf_model_kwargs={"_attn_implementation": "sdpa"},
        use_tokenizer_eos=True,
        vllm_output_post_proc=model_utils.phi3v_vllm_to_hf_output,
        num_logprobs=10,
    ),
    "pixtral_hf": VLMTestInfo(
        models=["nm-testing/pixtral-12b-FP8-dynamic"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<s>[INST]{img_prompt}[/INST]",
        img_idx_to_prompt=lambda idx: "[IMG]",
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        marks=[large_gpu_mark(min_gb=48)],
    ),
    "qwen_vl": VLMTestInfo(
        models=["Qwen/Qwen-VL"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=identity,
        img_idx_to_prompt=lambda idx: f"Picture {idx}: <img></img>\n",
        max_model_len=1024,
        max_num_seqs=2,
        vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
        prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
    ),
    "qwen2_vl": VLMTestInfo(
        models=["Qwen/Qwen2-VL-2B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.cpu_model],
    ),
    "skywork_r1v": VLMTestInfo(
        models=["Skywork/Skywork-R1V-38B"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n",  # noqa: E501
        single_image_prompts=IMAGE_ASSETS.prompts(
            {
                "stop_sign": "<image>\nWhat's the content in the center of the image?",
                "cherry_blossom": "<image>\nWhat is the season?",
            }
        ),
        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
        marks=[large_gpu_mark(min_gb=80)],
    ),
    "smolvlm": VLMTestInfo(
        models=["HuggingFaceTB/SmolVLM2-2.2B-Instruct"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"<|im_start|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<image>",
        max_model_len=8192,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        hf_output_post_proc=model_utils.smolvlm_trunc_hf_output,
        num_logprobs=10,
    ),
    "tarsier": VLMTestInfo(
        models=["omni-research/Tarsier-7b"],
        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt} ASSISTANT:",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        patch_hf_runner=model_utils.tarsier_patch_hf_runner,
    ),
    "tarsier2": VLMTestInfo(
        models=["omni-research/Tarsier2-Recap-7b"],
        test_type=(
            VLMTestType.IMAGE,
            VLMTestType.MULTI_IMAGE,
            VLMTestType.VIDEO,
        ),
        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
        marks=[pytest.mark.skip("Model initialization hangs")],
    ),
    ### Tensor parallel / multi-gpu broadcast tests
    "chameleon-broadcast": VLMTestInfo(
        models=["facebook/chameleon-7b"],
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=lambda vllm_output, model: vllm_output[:2],
        hf_output_post_proc=lambda hf_output, model: hf_output[:2],
        comparator=check_outputs_equal,
        marks=multi_gpu_marks(num_gpus=2),
        **COMMON_BROADCAST_SETTINGS,  # type: ignore
    ),
    "llava-broadcast": VLMTestInfo(
        models=["llava-hf/llava-1.5-7b-hf"],
        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
        max_model_len=4096,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        marks=multi_gpu_marks(num_gpus=2),
        **COMMON_BROADCAST_SETTINGS,  # type: ignore
    ),
    "llava_next-broadcast": VLMTestInfo(
        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
        max_model_len=10240,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
        marks=multi_gpu_marks(num_gpus=2),
        **COMMON_BROADCAST_SETTINGS,  # type: ignore
    ),
    ### Custom input edge-cases for specific models
    "intern_vl-diff-patches": VLMTestInfo(
        models=["OpenGVLab/InternVL2-2B"],
        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=4096,
        use_tokenizer_eos=True,
        patch_hf_runner=model_utils.internvl_patch_hf_runner,
        custom_test_opts=[
            CustomTestOptions(
                inputs=inp,
                limit_mm_per_prompt={"image": 2},
            )
            for inp in custom_inputs.different_patch_input_cases_internvl()
        ],
    ),
    "llava_onevision-multiple-images": VLMTestInfo(
        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=16384,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
        ),
        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(
                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
                    formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
                ),
                limit_mm_per_prompt={"image": 4},
            )
        ],
        marks=[
            pytest.mark.skipif(
                Version(TRANSFORMERS_VERSION) == Version("4.57.1"),
                reason="This model is broken in Transformers v4.57.1",
            )
        ],
    ),
    # regression test for https://github.com/vllm-project/vllm/issues/15122
    "qwen2_5_vl-windows-attention": VLMTestInfo(
        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
        test_type=VLMTestType.CUSTOM_INPUTS,
        max_model_len=4096,
        max_num_seqs=2,
        auto_cls=AutoModelForImageTextToText,
        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
        custom_test_opts=[
            CustomTestOptions(
                inputs=custom_inputs.windows_attention_image_qwen2_5_vl(),
                limit_mm_per_prompt={"image": 1},
            )
        ],
    ),
}


def _mark_splits(
    test_settings: dict[str, VLMTestInfo],
    *,
    num_groups: int,
) -> dict[str, VLMTestInfo]:
    name_by_test_info_id = {id(v): k for k, v in test_settings.items()}
    test_infos_by_model = defaultdict[str, list[VLMTestInfo]](list)

    for info in test_settings.values():
        for model in info.models:
            test_infos_by_model[model].append(info)

    models = sorted(test_infos_by_model.keys())
    split_size = math.ceil(len(models) / num_groups)

    new_test_settings = dict[str, VLMTestInfo]()

    for i in range(num_groups):
        models_in_group = models[i * split_size : (i + 1) * split_size]

        for model in models_in_group:
            for info in test_infos_by_model[model]:
                new_marks = (info.marks or []) + [pytest.mark.split(group=i)]
                new_info = info._replace(marks=new_marks)
                new_test_settings[name_by_test_info_id[id(info)]] = new_info

    missing_keys = test_settings.keys() - new_test_settings.keys()
    assert not missing_keys, f"Missing keys: {missing_keys}"

    return new_test_settings


VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)


### Test wrappers
# Wrappers around the core test running func for:
# - single image
# - multi-image
# - image embeddings
# - video
# - audio
# - custom inputs
@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.IMAGE,
        create_new_process_for_each_test=False,
    ),
)
def test_single_image_models(
    tmp_path: PosixPath,
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    image_assets: ImageTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_single_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.MULTI_IMAGE,
        create_new_process_for_each_test=False,
    ),
)
def test_multi_image_models(
    tmp_path: PosixPath,
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    image_assets: ImageTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_multi_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.EMBEDDING,
        create_new_process_for_each_test=False,
    ),
)
def test_image_embedding_models(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    image_assets: ImageTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_embedding_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.VIDEO,
        create_new_process_for_each_test=False,
    ),
)
def test_video_models(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    video_assets: VideoTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_video_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        video_assets=video_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.AUDIO,
        create_new_process_for_each_test=False,
    ),
)
def test_audio_models(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    audio_assets: AudioTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_audio_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        audio_assets=audio_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.CUSTOM_INPUTS,
        create_new_process_for_each_test=False,
    ),
)
def test_custom_inputs_models(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_custom_inputs_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
    )


#### Tests filtering for things running each test as a new process
@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.IMAGE,
        create_new_process_for_each_test=True,
    ),
)
@create_new_process_for_each_test()
def test_single_image_models_heavy(
    tmp_path: PosixPath,
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    image_assets: ImageTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_single_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.MULTI_IMAGE,
        create_new_process_for_each_test=True,
    ),
)
@create_new_process_for_each_test()
def test_multi_image_models_heavy(
    tmp_path: PosixPath,
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    image_assets: ImageTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_multi_image_test(
        tmp_path=tmp_path,
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.EMBEDDING,
        create_new_process_for_each_test=True,
    ),
)
@create_new_process_for_each_test()
def test_image_embedding_models_heavy(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    image_assets: ImageTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_embedding_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        image_assets=image_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.VIDEO,
        create_new_process_for_each_test=True,
    ),
)
def test_video_models_heavy(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    video_assets: VideoTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_video_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        video_assets=video_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.AUDIO,
        create_new_process_for_each_test=True,
    ),
)
def test_audio_models_heavy(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
    audio_assets: AudioTestAssets,
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_audio_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
        audio_assets=audio_assets,
    )


@pytest.mark.parametrize(
    "model_type,test_case",
    get_parametrized_options(
        VLM_TEST_SETTINGS,
        test_type=VLMTestType.CUSTOM_INPUTS,
        create_new_process_for_each_test=True,
    ),
)
@create_new_process_for_each_test()
def test_custom_inputs_models_heavy(
    model_type: str,
    test_case: ExpandableVLMTestArgs,
    hf_runner: type[HfRunner],
    vllm_runner: type[VllmRunner],
):
    model_test_info = VLM_TEST_SETTINGS[model_type]
    runners.run_custom_inputs_test(
        model_test_info=model_test_info,
        test_case=test_case,
        hf_runner=hf_runner,
        vllm_runner=vllm_runner,
    )
-												[Misc] Add SPDX-License-Identifier headers to python source files (#12628)

- **Add SPDX license headers to python source files**
- **Check for SPDX headers using pre-commit**

commit 9d7ef44c3cfb72ca4c32e1c677d99259d10d4745
Author: Russell Bryant <rbryant@redhat.com>
Date:   Fri Jan 31 14:18:24 2025 -0500

    Add SPDX license headers to python source files
    
This commit adds SPDX license headers to python source files as
recommended to
the project by the Linux Foundation. These headers provide a concise way
that is
both human and machine readable for communicating license information
for each
source file. It helps avoid any ambiguity about the license of the code
and can
    also be easily used by tools to help manage license compliance.
    
The Linux Foundation runs license scans against the codebase to help
ensure
    we are in compliance with the licenses of the code we use, including
dependencies. Having these headers in place helps that tool do its job.
    
    More information can be found on the SPDX site:
    
    - https://spdx.dev/learn/handling-license-info/
    
    Signed-off-by: Russell Bryant <rbryant@redhat.com>

commit 5a1cf1cb3b80759131c73f6a9dddebccac039dea
Author: Russell Bryant <rbryant@redhat.com>
Date:   Fri Jan 31 14:36:32 2025 -0500

    Check for SPDX headers using pre-commit
    
    Signed-off-by: Russell Bryant <rbryant@redhat.com>

---------

Signed-off-by: Russell Bryant <rbryant@redhat.com>
											
										
										
											2025-02-02 14:58:18 -05:00
+								# SPDX-License-Identifier: Apache-2.0
-												[Misc] Add SPDX-FileCopyrightText  (#19100)

Signed-off-by: simon-mo <simon.mo@hey.com>
											
										
										
											2025-06-03 11:20:17 -07:00
+								# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								"""Common tests for testing .generate() functionality for single / multiple
 								image, embedding, and video support for different VLMs in vLLM.
 								"""
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								import math
 								from collections import defaultdict
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								from pathlib import PosixPath
 								import pytest
-												[CI] Skip "Multi-Modal Models Test (Extended) 3" test that's broken in current Transformers (#28559)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-11-12 19:38:13 +00:00
+								from packaging.version import Version
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								from transformers import (
 								    AutoModel,
-												[Fix] improve aspect ratio in dummy image generation and add common  VLM tests for PaddleOCR-VL (#28711)

Signed-off-by: dongbo910220 <1275604947@qq.com>
											
										
										
											2025-11-15 00:07:20 +08:00
+								    AutoModelForCausalLM,
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								    AutoModelForImageTextToText,
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								    AutoModelForTextToWaveform,
 								)
-												[CI] Skip "Multi-Modal Models Test (Extended) 3" test that's broken in current Transformers (#28559)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-11-12 19:38:13 +00:00
+								from transformers import __version__ as TRANSFORMERS_VERSION
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
 								from vllm.platforms import current_platform
-												[Misc] Move utils to avoid conflicts with stdlib, and move tests (#27169)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-10-19 20:20:55 +08:00
+								from vllm.utils.func_utils import identity
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								from ....conftest import (
 								    IMAGE_ASSETS,
 								    AudioTestAssets,
 								    HfRunner,
 								    ImageTestAssets,
 								    VideoTestAssets,
 								    VllmRunner,
 								)
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								from ....utils import create_new_process_for_each_test, large_gpu_mark, multi_gpu_marks
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								from ...utils import check_outputs_equal
 								from .vlm_utils import custom_inputs, model_utils, runners
 								from .vlm_utils.case_filtering import get_parametrized_options
 								from .vlm_utils.types import (
 								    CustomTestOptions,
 								    ExpandableVLMTestArgs,
 								    VLMTestInfo,
 								    VLMTestType,
 								)
 								COMMON_BROADCAST_SETTINGS = {
 								    "test_type": VLMTestType.IMAGE,
 								    "dtype": "half",
 								    "max_tokens": 5,
 								    "tensor_parallel_size": 2,
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								    "hf_model_kwargs": {"device_map": "auto"},
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								    "image_size_factors": [(0.25, 0.5, 1.0)],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "distributed_executor_backend": (
 								        "ray",
 								        "mp",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								}
 								### Test configuration for specific models
 								# NOTE: The convention of the test settings below is to lead each test key
 								# with the name of the model arch used in the test, using underscores in place
 								# of hyphens; this makes it more convenient to filter tests for a specific kind
 								# of model. For example....
 								#
 								# To run all test types for a specific key:
 								#     use the k flag to substring match with a leading square bracket; if the
 								#     model arch happens to be a substring of another one, you can add a
 								#     trailing hyphen. E.g.,
 								#                 - pytest $TEST_FILE -k "[llava-"
 								#     prevents matching on "[llava_next-" & will match just the enabled cases
 								#     for llava, i.e., single image, image embedding, and custom input tests.
 								#
 								# To run a test for a Test Info for just one of multiple models:
 								#     use the k flag to substring match the model name, e.g.,
 								#                 - pytest $TEST_FILE -k OpenGVLab/InternVL2-1B
 								#     prevents matching on nGVLab/InternVL2-2B.
 								#
 								# You can also combine substrings to match more granularly.
 								#     ex 1:
 								#        pytest $TEST_FILE -k "test_single_image and OpenGVLab/InternVL2-1B"
 								#     will run only test_single_image* for OpenGVLab/InternVL2-1B; this would
 								#     match both wrappers for single image tests, since it also matches
 								#     test_single_image_heavy (which forks if we have a distributed backend)
 								#     ex 2:
 								#        pytest $TEST_FILE -k  "[llava- or [intern_vl-"
 								#     will run all of the tests for only llava & internvl.
 								#
 								# NOTE you can add --collect-only to any of the above commands to see
 								# which cases would be selected and deselected by pytest. In general,
 								# this is a good idea for checking your command first, since tests are slow.
 								VLM_TEST_SETTINGS = {
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    #### Core tests to always run in the CI
 								    "llava": VLMTestInfo(
 								        models=["llava-hf/llava-1.5-7b-hf"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        test_type=(VLMTestType.EMBEDDING, VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        convert_assets_to_embeddings=model_utils.get_llava_embeddings,
 								        max_model_len=4096,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
 								                    formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:"
 								                ),
 								                limit_mm_per_prompt={"image": 4},
 								            )
 								        ],
-												[Frontend] Require flag for loading text and image embeds (#27204)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-10-22 11:52:02 -04:00
+								        vllm_runner_kwargs={"enable_mm_embeds": True},
-												[CI/Build] Update CPU tests to include all "standard" tests (#5481)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-08 23:30:04 +08:00
+								        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    ),
-												[Model] Revert PR #26715: Restore custom PaliGemma and Gemma3-MM impl… (#27309)

Signed-off-by: Luciano Martins <lucianommartins@users.noreply.github.com>
Co-authored-by: Luciano Martins <lucianommartins@users.noreply.github.com>
											
										
										
											2025-10-22 14:05:34 -03:00
+								    "paligemma": VLMTestInfo(
 								        models=["google/paligemma-3b-mix-224"],
 								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=identity,
 								        img_idx_to_prompt=lambda idx: "",
 								        # Paligemma uses its own sample prompts because the default one fails
 								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
 								                "stop_sign": "caption es",
 								                "cherry_blossom": "What is in the picture?",
 								            }
 								        ),
 								        auto_cls=AutoModelForImageTextToText,
 								        vllm_output_post_proc=model_utils.paligemma_vllm_to_hf_output,
 								        dtype="bfloat16",
 								        marks=[
 								            pytest.mark.skip(reason="vLLM does not support PrefixLM attention mask")
 								        ],
 								    ),
-												[VLM] Qwen2.5-VL


											
										
										
											2025-02-05 13:31:38 -08:00
+								    "qwen2_5_vl": VLMTestInfo(
 								        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
 								        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
-												[Misc] Turn off encoder torch compile by default (#28634)

Signed-off-by: Roger Wang <hey@rogerw.io>
											
										
										
											2025-11-13 08:38:08 -08:00
+								        enforce_eager=False,
-												[VLM] Qwen2.5-VL


											
										
										
											2025-02-05 13:31:38 -08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[VLM] Qwen2.5-VL


											
										
										
											2025-02-05 13:31:38 -08:00
+								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
-												[VLM] Separate text-only and vision variants of the same model architecture (#13157)


											
										
										
											2025-02-13 22:19:15 +08:00
+								        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
-												[VLM] Qwen2.5-VL


											
										
										
											2025-02-05 13:31:38 -08:00
+								    ),
-												[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Xiong Wang <wangxiongts@163.com>
											
										
										
											2025-04-19 14:14:36 +08:00
+								    "qwen2_5_omni": VLMTestInfo(
-												[Bugfix] Fix broken Qwen2.5-omni tests (#17613)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-04 01:08:14 +08:00
+								        models=["Qwen/Qwen2.5-Omni-3B"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<|vision_bos|><|IMAGE|><|vision_eos|>",
 								        video_idx_to_prompt=lambda idx: "<|vision_bos|><|VIDEO|><|vision_eos|>",
-												[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Xiong Wang <wangxiongts@163.com>
											
										
										
											2025-04-19 14:14:36 +08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        num_logprobs=6 if current_platform.is_cpu() else 5,
-												[Bugfix] Fix broken Qwen2.5-omni tests (#17613)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-04 01:08:14 +08:00
+								        auto_cls=AutoModelForTextToWaveform,
-												[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Xiong Wang <wangxiongts@163.com>
											
										
										
											2025-04-19 14:14:36 +08:00
+								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
-												[Bugfix] Fix broken Qwen2.5-omni tests (#17613)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-04 01:08:14 +08:00
+								        patch_hf_runner=model_utils.qwen2_5_omni_patch_hf_runner,
-												[Model][VLM] Add Qwen2.5-Omni model support (thinker only) (#15130)

Signed-off-by: fyabc <suyang.fy@alibaba-inc.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Xiong Wang <wangxiongts@163.com>
											
										
										
											2025-04-19 14:14:36 +08:00
+								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
 								    ),
-												[VLM] Add Qwen3-VL generation test (#25185)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: Roger Wang <hey@rogerw.io>
Co-authored-by: Roger Wang <hey@rogerw.io>
											
										
										
											2025-10-29 20:19:37 +08:00
+								    "qwen3_vl": VLMTestInfo(
 								        models=["Qwen/Qwen3-VL-4B-Instruct"],
 								        test_type=(
 								            VLMTestType.IMAGE,
 								            VLMTestType.MULTI_IMAGE,
 								            VLMTestType.VIDEO,
 								        ),
-												[Misc] Turn off encoder torch compile by default (#28634)

Signed-off-by: Roger Wang <hey@rogerw.io>
											
										
										
											2025-11-13 08:38:08 -08:00
+								        enforce_eager=False,
-												[VLM] Add Qwen3-VL generation test (#25185)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Signed-off-by: Roger Wang <hey@rogerw.io>
Co-authored-by: Roger Wang <hey@rogerw.io>
											
										
										
											2025-10-29 20:19:37 +08:00
+								        needs_video_metadata=True,
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",  # noqa: E501
 								        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",  # noqa: E501
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        num_logprobs=20,
 								        auto_cls=AutoModelForImageTextToText,
 								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        patch_hf_runner=model_utils.qwen3_vl_patch_hf_runner,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								        marks=[
 								            pytest.mark.core_model,
 								        ],
 								    ),
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								    "ultravox": VLMTestInfo(
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        models=["fixie-ai/ultravox-v0_5-llama-3_2-1b"],
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								        test_type=VLMTestType.AUDIO,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda audio_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{audio_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								        audio_idx_to_prompt=lambda idx: "<|audio|>",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModel,
 								        hf_output_post_proc=model_utils.ultravox_trunc_hf_output,
 								        marks=[pytest.mark.core_model, pytest.mark.cpu_model],
 								    ),
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								    #### Transformers fallback to test
 								    ## To reduce test burden, we only test batching arbitrary image size
 								    # Dynamic image length and number of patches
 								    "llava-onevision-transformers": VLMTestInfo(
 								        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
 								        test_type=VLMTestType.IMAGE,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								        max_model_len=16384,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
 								            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        ),
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								        auto_cls=AutoModelForImageTextToText,
 								        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
 								        image_size_factors=[(0.25, 0.5, 1.0)],
 								        vllm_runner_kwargs={
 								            "model_impl": "transformers",
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								            "default_torch_num_threads": 1,
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								        },
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								        # FIXME: Investigate why the test hangs
 								        # when processing the 3rd prompt in vLLM
 								        marks=[pytest.mark.core_model, pytest.mark.skip(reason="Test hangs")],
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								    ),
-												[Bugfix] Fix gemma3 with transformers backend (#23178)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Raushan Turganbay <raushan@huggingface.co>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 20:42:32 +02:00
+								    # Gemma3 has bidirectional mask on images
 								    "gemma3-transformers": VLMTestInfo(
 								        models=["google/gemma-3-4b-it"],
-												[Model] Revert PR #26715: Restore custom PaliGemma and Gemma3-MM impl… (#27309)

Signed-off-by: Luciano Martins <lucianommartins@users.noreply.github.com>
Co-authored-by: Luciano Martins <lucianommartins@users.noreply.github.com>
											
										
										
											2025-10-22 14:05:34 -03:00
+								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda vid_prompt: f"<'<bos><start_of_turn>user\n{vid_prompt}<start_of_image><end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
 								        max_model_len=4096,
-												[Bugfix] Fix gemma3 with transformers backend (#23178)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Raushan Turganbay <raushan@huggingface.co>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 20:42:32 +02:00
+								        auto_cls=AutoModelForImageTextToText,
 								        vllm_output_post_proc=model_utils.gemma3_vllm_to_hf_output,
 								        image_size_factors=[(0.25, 0.5, 1.0)],
 								        vllm_runner_kwargs={
 								            "model_impl": "transformers",
 								        },
 								        marks=[pytest.mark.core_model],
 								    ),
-												[CI] enable idefics3 and fuyu-8b test in multimodal test (#23790)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
											
										
										
											2025-08-28 14:51:24 +08:00
+								    "idefics3-transformers": VLMTestInfo(
 								        models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
-												[CI] enable idefics3 and fuyu-8b test in multimodal test (#23790)

Signed-off-by: zjy0516 <riverclouds.zhu@qq.com>
											
										
										
											2025-08-28 14:51:24 +08:00
+								        img_idx_to_prompt=lambda idx: "<image>",
 								        max_model_len=8192,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
 								        image_size_factors=[(0.25, 0.5, 1.0)],
 								        vllm_runner_kwargs={
 								            "model_impl": "transformers",
 								        },
 								        marks=[pytest.mark.core_model],
 								    ),
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								    # Pixel values from processor are not 4D or 5D arrays
 								    "qwen2_5_vl-transformers": VLMTestInfo(
 								        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
 								        test_type=VLMTestType.IMAGE,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        image_size_factors=[(0.25, 0.2, 0.15)],
 								        vllm_runner_kwargs={
 								            "model_impl": "transformers",
 								        },
-												[Bugfix] Fix mrope in Transformers Backend (#26087)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 13:40:50 +02:00
+								        marks=[large_gpu_mark(min_gb=32)],
-												[Model] Support VLMs with transformers backend (#20543)

Signed-off-by: raushan <raushan@huggingface.co>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-07-20 15:25:50 +02:00
+								    ),
-												[CI/Build] Add Model Tests for Qwen2-VL (#9846)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-10-31 10:10:52 -06:00
+								    #### Extended model tests
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								    "aria": VLMTestInfo(
 								        models=["rhymes-ai/Aria"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n ",  # noqa: E501
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        img_idx_to_prompt=lambda idx: "<fim_prefix><|img|><fim_suffix>\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
 								                "stop_sign": "<vlm_image>Please describe the image shortly.",
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "cherry_blossom": "<vlm_image>Please infer the season with reason.",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								            }
 								        ),
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        multi_image_prompt="<vlm_image><vlm_image>Describe the two images shortly.",
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        stop_str=["<|im_end|>"],
 								        image_size_factors=[(0.10, 0.15)],
 								        max_tokens=64,
 								        marks=[large_gpu_mark(min_gb=64)],
 								    ),
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								    "aya_vision": VLMTestInfo(
 								        models=["CohereForAI/aya-vision-8b"],
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        test_type=(VLMTestType.IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "stop_sign": "<image>What's the content in the center of the image?",
 								                "cherry_blossom": "<image>What is the season?",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								            }
 								        ),
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        multi_image_prompt="<image><image>Describe the two images in detail.",
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
 								    ),
 								    "aya_vision-multi_image": VLMTestInfo(
 								        models=["CohereForAI/aya-vision-8b"],
 								        test_type=(VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{img_prompt}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>",  # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "stop_sign": "<image>What's the content in the center of the image?",
 								                "cherry_blossom": "<image>What is the season?",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								            }
 								        ),
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        multi_image_prompt="<image><image>Describe the two images in detail.",
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        max_model_len=4096,
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        vllm_runner_kwargs={"mm_processor_kwargs": {"crop_to_patches": True}},
 								        marks=[large_gpu_mark(min_gb=32)],
-												[Model] Aya Vision (#15441)

Signed-off-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-04-01 09:30:43 -07:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "blip2": VLMTestInfo(
-												[V1] Support any head size for FlexAttention backend (#20467)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-07-07 00:54:36 +08:00
+								        models=["Salesforce/blip2-opt-2.7b"],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda img_prompt: f"Question: {img_prompt} Answer:",
 								        img_idx_to_prompt=lambda idx: "",
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.blip2_vllm_to_hf_output,
-												[CI/Build][Bugfix] Ensure compatibility with transformers 4.52 (#18678)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-06-04 19:49:20 +08:00
+								        # FIXME: https://github.com/huggingface/transformers/pull/38510
 								        marks=[pytest.mark.skip("Model is broken")],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
 								    "chameleon": VLMTestInfo(
 								        models=["facebook/chameleon-7b"],
 								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        max_model_len=4096,
-												[V1][VLM] V1 support for selected single-image models. (#11632)

Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Isotr0py <2037008807@qq.com>
											
										
										
											2024-12-31 13:17:22 -08:00
+								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        # For chameleon, we only compare the sequences
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        vllm_output_post_proc=lambda vllm_output, model: vllm_output[:2],
 								        hf_output_post_proc=lambda hf_output, model: hf_output[:2],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        comparator=check_outputs_equal,
 								        max_tokens=8,
 								        dtype="bfloat16",
 								    ),
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								    "deepseek_vl_v2": VLMTestInfo(
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        models=["Isotr0py/deepseek-vl2-tiny"],  # model repo using dynamic module
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|User|>: {img_prompt}\n\n<|Assistant|>: ",  # noqa: E501
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "stop_sign": "<image>\nWhat's the content in the center of the image?",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								                "cherry_blossom": "<image>\nPlease infer the season with reason in details.",  # noqa: E501
 								            }
 								        ),
 								        multi_image_prompt="image_1:<image>\nimage_2:<image>\nWhich image can we see the car and the tower?",  # noqa: E501
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								        patch_hf_runner=model_utils.deepseekvl2_patch_hf_runner,
 								        hf_output_post_proc=model_utils.deepseekvl2_trunc_hf_output,
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        stop_str=["<｜end▁of▁sentence｜>", "<｜begin▁of▁sentence｜>"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        image_size_factors=[(), (1.0,), (1.0, 1.0, 1.0), (0.1, 0.5, 1.0)],
-												[Model] Initialize support for Deepseek-VL2 models (#11578)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-01-12 16:17:24 +08:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "fuyu": VLMTestInfo(
 								        models=["adept/fuyu-8b"],
 								        test_type=VLMTestType.IMAGE,
 								        prompt_formatter=lambda img_prompt: f"{img_prompt}\n",
 								        img_idx_to_prompt=lambda idx: "",
 								        max_model_len=2048,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        use_tokenizer_eos=True,
 								        vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output,
 								        num_logprobs=10,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								        marks=[large_gpu_mark(min_gb=32)],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[Model] Revert PR #26715: Restore custom PaliGemma and Gemma3-MM impl… (#27309)

Signed-off-by: Luciano Martins <lucianommartins@users.noreply.github.com>
Co-authored-by: Luciano Martins <lucianommartins@users.noreply.github.com>
											
										
										
											2025-10-22 14:05:34 -03:00
+								    "gemma3": VLMTestInfo(
 								        models=["google/gemma-3-4b-it"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
 								                "stop_sign": "<start_of_image>What's the content in the center of the image?",  # noqa: E501
 								                "cherry_blossom": "<start_of_image>What is the season?",
 								            }
 								        ),
 								        multi_image_prompt="<start_of_image><start_of_image>Describe the two images in detail.",  # noqa: E501
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        vllm_runner_kwargs={"mm_processor_kwargs": {"do_pan_and_scan": True}},
 								        patch_hf_runner=model_utils.gemma3_patch_hf_runner,
 								        num_logprobs=10,
 								    ),
-												[VLM] Separate text-only and vision variants of the same model architecture (#13157)


											
										
										
											2025-02-13 22:19:15 +08:00
+								    "glm4v": VLMTestInfo(
-												[Misc] Modify the organization of GLM series  (#22171)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-08-04 14:51:20 +08:00
+								        models=["zai-org/glm-4v-9b"],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        test_type=VLMTestType.IMAGE,
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
 								                "stop_sign": "<|begin_of_image|><|endoftext|><|end_of_image|>What's the content in the center of the image?",  # noqa: E501
 								                "cherry_blossom": "<|begin_of_image|><|endoftext|><|end_of_image|>What is the season?",  # noqa: E501
 								            }
 								        ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        max_model_len=2048,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
-												[Bugfix] Fix prompt format of GLM4V (#14539)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-13 19:37:17 +08:00
+								        patch_hf_runner=model_utils.glm4v_patch_hf_runner,
 								        # The image embeddings match with HF but the outputs of the language
 								        # decoder are only consistent up to 2 decimal places.
 								        # So, we need to reduce the number of tokens for the test to pass.
 								        max_tokens=8,
 								        num_logprobs=10,
-												[VLM] Support caching in merged multi-modal processor (#11396)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-28 01:22:48 +08:00
+								        marks=[large_gpu_mark(min_gb=32)],
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								    ),
-												Add GLM4.1V model (Draft) (#19331)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-01 20:48:26 +08:00
+								    "glm4_1v": VLMTestInfo(
-												[Misc] Modify the organization of GLM series  (#22171)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-08-04 14:51:20 +08:00
+								        models=["zai-org/GLM-4.1V-9B-Thinking"],
-												Add GLM4.1V model (Draft) (#19331)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-01 20:48:26 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|assistant|>",
 								        img_idx_to_prompt=lambda idx: "<|begin_of_image|><|image|><|end_of_image|>",
 								        video_idx_to_prompt=lambda idx: "<|begin_of_video|><|video|><|end_of_video|>",
-												Add GLM4.1V model (Draft) (#19331)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-01 20:48:26 +08:00
+								        max_model_len=2048,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: [151329, 151336, 151338],
 								        num_logprobs=10,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] Ensure compatability with Transformers v4.53 (#20541)

Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-12 11:53:07 +08:00
+								        marks=[large_gpu_mark(min_gb=32)],
-												Add GLM4.1V model (Draft) (#19331)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-01 20:48:26 +08:00
+								    ),
 								    "glm4_1v-video": VLMTestInfo(
-												[Misc] Modify the organization of GLM series  (#22171)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-08-04 14:51:20 +08:00
+								        models=["zai-org/GLM-4.1V-9B-Thinking"],
-												Add GLM4.1V model (Draft) (#19331)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-01 20:48:26 +08:00
+								        # GLM4.1V require include video metadata for input
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        patch_hf_runner=model_utils.glm4_1v_patch_hf_runner,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=custom_inputs.video_with_metadata_glm4_1v(),
 								                limit_mm_per_prompt={"video": 1},
 								            )
 								        ],
-												[CI/Build] Ensure compatability with Transformers v4.53 (#20541)

Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-12 11:53:07 +08:00
+								        marks=[large_gpu_mark(min_gb=32)],
-												Add GLM4.1V model (Draft) (#19331)

Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-07-01 20:48:26 +08:00
+								    ),
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								    "h2ovl": VLMTestInfo(
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        models=[
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								            "h2oai/h2ovl-mississippi-800m",
-												[V1] Support any head size for FlexAttention backend (#20467)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-07-07 00:54:36 +08:00
+								            "h2oai/h2ovl-mississippi-2b",
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								        ],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|prompt|>{img_prompt}<|end|><|answer|>",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "stop_sign": "<image>\nWhat's the content in the center of the image?",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								                "cherry_blossom": "<image>\nWhat is the season?",
 								            }
 								        ),
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
 								        max_model_len=8192,
 								        use_tokenizer_eos=True,
-												[VLM] Merged multi-modal processor for InternVL-based models (#12553)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-02-04 16:44:52 +08:00
+								        num_logprobs=10,
-												[Model] Add support for H2OVL-Mississippi models (#9747)

Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2024-11-03 18:15:36 -06:00
+								        patch_hf_runner=model_utils.h2ovl_patch_hf_runner,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								    "idefics3": VLMTestInfo(
-												[VLM] merged multimodal processor and V1 support for idefics3 (#12660)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-02-04 20:00:51 +08:00
+								        models=["HuggingFaceTB/SmolVLM-256M-Instruct"],
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								        img_idx_to_prompt=lambda idx: "<image>",
 								        max_model_len=8192,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[VLM] merged multimodal processor and V1 support for idefics3 (#12660)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-02-04 20:00:51 +08:00
+								        hf_output_post_proc=model_utils.idefics3_trunc_hf_output,
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "intern_vl": VLMTestInfo(
 								        models=[
 								            "OpenGVLab/InternVL2-1B",
 								            "OpenGVLab/InternVL2-2B",
-												[CI/Build][Bugfix] Ensure compatibility with transformers 4.52 (#18678)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-06-04 19:49:20 +08:00
+								            # FIXME: Config cannot be loaded in transformers 4.52
 								            # "OpenGVLab/Mono-InternVL-2B",
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        ],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "stop_sign": "<image>\nWhat's the content in the center of the image?",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								                "cherry_blossom": "<image>\nWhat is the season?",
 								            }
 								        ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        multi_image_prompt="Image-1: <image>\nImage-2: <image>\nDescribe the two images in short.",  # noqa: E501
 								        max_model_len=4096,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.internvl_patch_hf_runner,
 								    ),
-												[VLM] Initialize video input support for InternVL models (#18499)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-05-25 12:51:25 +08:00
+								    "intern_vl-video": VLMTestInfo(
 								        models=[
 								            "OpenGVLab/InternVL3-1B",
 								        ],
 								        test_type=VLMTestType.VIDEO,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
-												[VLM] Initialize video input support for InternVL models (#18499)

Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-05-25 12:51:25 +08:00
+								        video_idx_to_prompt=lambda idx: "<video>",
 								        max_model_len=8192,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.internvl_patch_hf_runner,
 								    ),
-												[Model] Enable native HF format InternVL support (#23742)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-08-27 22:45:17 +08:00
+								    "intern_vl-hf": VLMTestInfo(
 								        models=["OpenGVLab/InternVL3-1B-hf"],
 								        test_type=(
 								            VLMTestType.IMAGE,
 								            VLMTestType.MULTI_IMAGE,
 								            VLMTestType.VIDEO,
 								        ),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
-												[Model] Enable native HF format InternVL support (#23742)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-08-27 22:45:17 +08:00
+								        img_idx_to_prompt=lambda idx: "<IMG_CONTEXT>",
 								        video_idx_to_prompt=lambda idx: "<video>",
 								        max_model_len=8192,
 								        use_tokenizer_eos=True,
 								        auto_cls=AutoModelForImageTextToText,
 								    ),
-												[Model][VLM] Add Kimi-VL model support (#16387)

Signed-off-by: courage17340 <courage17340@163.com>
											
										
										
											2025-04-15 05:41:48 +08:00
+								    "kimi_vl": VLMTestInfo(
 								        models=["moonshotai/Kimi-VL-A3B-Instruct"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_user|>user<|im_middle|>{img_prompt}<|im_end|><|im_assistant|>assistant<|im_middle|>",  # noqa: E501
-												[Model][VLM] Add Kimi-VL model support (#16387)

Signed-off-by: courage17340 <courage17340@163.com>
											
										
										
											2025-04-15 05:41:48 +08:00
+								        img_idx_to_prompt=lambda _: "<|media_start|>image<|media_content|><|media_pad|><|media_end|>",  # noqa: E501
 								        max_model_len=8192,
 								        max_num_seqs=2,
 								        dtype="bfloat16",
 								        tensor_parallel_size=1,
 								        vllm_output_post_proc=model_utils.kimiv_vl_vllm_to_hf_output,
 								        marks=[large_gpu_mark(min_gb=48)],
 								    ),
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								    "llama4": VLMTestInfo(
 								        models=["meta-llama/Llama-4-Scout-17B-16E-Instruct"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|header_start|>user<|header_end|>\n\n{img_prompt}<|eot|><|header_start|>assistant<|header_end|>\n\n",  # noqa: E501
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        img_idx_to_prompt=lambda _: "<|image|>",
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        distributed_executor_backend="mp",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        image_size_factors=[(0.25, 0.5, 1.0)],
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								        hf_model_kwargs={"device_map": "auto"},
 								        max_model_len=8192,
 								        max_num_seqs=4,
 								        dtype="bfloat16",
 								        auto_cls=AutoModelForImageTextToText,
-												[V1] Scatter and gather placeholders in the model runner (#16076)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: mgoin <mgoin64@gmail.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: mgoin <mgoin64@gmail.com>
Co-authored-by: Jennifer Zhao <ai.jenniferzhao@gmail.com>
											
										
										
											2025-04-07 19:43:41 -07:00
+								        tensor_parallel_size=4,
 								        marks=multi_gpu_marks(num_gpus=4),
-												Upstream Llama4 Support to Main (#16113)

Signed-off-by: Aston Zhang <22279212+astonzhang@users.noreply.github.com>
Signed-off-by: Chris Thi <chris.c.thi@gmail.com>
Signed-off-by: drisspg <drisspguessous@gmail.com>
Signed-off-by: Jon Swenson <jmswen@gmail.com>
Signed-off-by: Keyun Tong <tongkeyun@gmail.com>
Signed-off-by: Lu Fang <fanglu@meta.com>
Signed-off-by: Xiaodong Wang <xdwang@meta.com>
Signed-off-by: Yang Chen <yangche@fb.com>
Signed-off-by: Ye (Charlotte) Qi <yeq@meta.com>
Signed-off-by: Yong Hoon Shin <yhshin@meta.com>
Signed-off-by: Zijing Liu <liuzijing2014@gmail.com>
Signed-off-by: Lu Fang <lufang@fb.com>
Signed-off-by: Lu Fang <fanglu@fb.com>
Signed-off-by: Lucia Fang <fanglu@fb.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: Lu Fang <fanglu@fb.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-07 08:06:27 -07:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    "llava_next": VLMTestInfo(
 								        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.CUSTOM_INPUTS),
 								        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
 								        max_model_len=10240,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
 								                    formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]"
 								                ),
 								                limit_mm_per_prompt={"image": 4},
 								            )
 								        ],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[VLM] Merged multi-modal processors for LLaVA-NeXT-Video and LLaVA-OneVision (#11717)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-01-04 19:40:53 +08:00
+								    "llava_onevision": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
 								        test_type=VLMTestType.CUSTOM_INPUTS,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        num_video_frames=16,
 								        max_model_len=16384,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
 								            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        ),
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs(
 								                    formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
 								                ),
 								                limit_mm_per_prompt={"video": 4},
 								            )
 								        ],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
 								    "llava_next_video": VLMTestInfo(
 								        models=["llava-hf/LLaVA-NeXT-Video-7B-hf"],
 								        test_type=VLMTestType.VIDEO,
 								        prompt_formatter=lambda vid_prompt: f"USER: {vid_prompt} ASSISTANT:",
 								        num_video_frames=16,
 								        max_model_len=4096,
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        max_num_seqs=2,
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_video_vllm_to_hf_output,
 								    ),
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								    "mantis": VLMTestInfo(
 								        models=["TIGER-Lab/Mantis-8B-siglip-llama3"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        max_model_len=4096,
 								        get_stop_token_ids=lambda tok: [128009],
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[Model] Update multi-modal processor to support Mantis(LLaVA) model (#10711)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-08 01:10:05 +08:00
+								        vllm_output_post_proc=model_utils.mantis_vllm_to_hf_output,
 								        patch_hf_runner=model_utils.mantis_patch_hf_runner,
 								    ),
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								    "minicpmv_25": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["openbmb/MiniCPM-Llama3-V-2_5"],
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								        test_type=VLMTestType.IMAGE,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        get_stop_token_ids=lambda tok: [tok.eos_id, tok.eot_id],
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
-												[Core] Update dtype detection and defaults (#14858)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-19 13:49:33 +08:00
+								        patch_hf_runner=model_utils.minicpmv_25_patch_hf_runner,
-												[CI/Build] Reorganize models tests (#17459)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-05-01 14:03:08 +08:00
+								        # FIXME: https://huggingface.co/openbmb/MiniCPM-V-2_6/discussions/55
 								        marks=[pytest.mark.skip("HF import fails")],
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								    ),
-												[Model] Refactoring of MiniCPM-V and add MiniCPM-o-2.6 support for vLLM (#12069)

Signed-off-by: hzh <hezhihui_thu@163.com>
Signed-off-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
Signed-off-by: shaochangxu.scx <shaochangxu.scx@antgroup.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: NickLucche <nlucches@redhat.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: Roger Wang <ywang@roblox.com>
Signed-off-by: Rafael Vasquez <rafvasq21@gmail.com>
Signed-off-by: Akshat Tripathi <akshat@krai.ai>
Signed-off-by: Oleg Mosalov <oleg@krai.ai>
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Signed-off-by: Yida Wu <yidawu@alumni.cmu.edu>
Signed-off-by: Chenguang Li <757486878@qq.com>
Signed-off-by: youkaichao <youkaichao@gmail.com>
Signed-off-by: Alex-Brooks <Alex.brooks@ibm.com>
Signed-off-by: Chen Zhang <zhangch99@outlook.com>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Signed-off-by: Shanshan Shen <467638484@qq.com>
Signed-off-by: elijah <f1renze.142857@gmail.com>
Signed-off-by: Yikun <yikunkero@gmail.com>
Signed-off-by: mgoin <michael@neuralmagic.com>
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Signed-off-by: Konrad Zawora <kzawora@habana.ai>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: Rui Qiao <ruisearch42@gmail.com>
Co-authored-by: Sungjae Lee <33976427+llsj14@users.noreply.github.com>
Co-authored-by: shaochangxu <85155497+shaochangxu@users.noreply.github.com>
Co-authored-by: shaochangxu.scx <shaochangxu.scx@antgroup.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
Co-authored-by: Nicolò Lucchesi <nlucches@redhat.com>
Co-authored-by: sixgod <evethwillbeok@outlook.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
Co-authored-by: Rafael Vasquez <rafvasq21@gmail.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: Akshat Tripathi <Akshat.tripathi6568@gmail.com>
Co-authored-by: Oleg Mosalov <oleg@krai.ai>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Avshalom Manevich <12231371+avshalomman@users.noreply.github.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>
Co-authored-by: Yangcheng Li <liyangcheng.lyc@alibaba-inc.com>
Co-authored-by: Siyuan Li <94890248+liaoyanqing666@users.noreply.github.com>
Co-authored-by: Concurrensee <yida.wu@amd.com>
Co-authored-by: Chenguang Li <757486878@qq.com>
Co-authored-by: youkaichao <youkaichao@gmail.com>
Co-authored-by: Alex Brooks <alex.brooks@ibm.com>
Co-authored-by: Chen Zhang <zhangch99@outlook.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Shanshan Shen <467638484@qq.com>
Co-authored-by: elijah <30852919+e1ijah1@users.noreply.github.com>
Co-authored-by: Yikun Jiang <yikunkero@gmail.com>
Co-authored-by: Steve Luo <36296769+SunflowerAries@users.noreply.github.com>
Co-authored-by: mgoin <michael@neuralmagic.com>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Co-authored-by: Konrad Zawora <kzawora@habana.ai>
Co-authored-by: TJian <tunjian1996@gmail.com>
Co-authored-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: wangxiyuan <wangxiyuan1007@gmail.com>
Co-authored-by: maang-h <55082429+maang-h@users.noreply.github.com>
Co-authored-by: Elfie Guo <164945471+elfiegg@users.noreply.github.com>
Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com>
Co-authored-by: Roger Wang <ywang@roblox.com>
											
										
										
											2025-01-29 17:24:59 +08:00
+								    "minicpmo_26": VLMTestInfo(
 								        models=["openbmb/MiniCPM-o-2_6"],
-												[V1] Override `mm_counts` for dummy data creation (#15703)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-30 18:20:42 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
 								            ["<|im_end|>", "<|endoftext|>"]
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        ),
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
 								        patch_hf_runner=model_utils.minicpmo_26_patch_hf_runner,
-												[CI/Build] Update VLM common tests (#22841)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-08-14 01:03:05 +08:00
+								        # FIXME: https://huggingface.co/openbmb/MiniCPM-o-2_6/discussions/49
-												[CI/Build] Reorganize models tests (#17459)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-05-01 14:03:08 +08:00
+								        marks=[pytest.mark.skip("HF import fails")],
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								    ),
-												[Model] Clean up MiniCPMV (#10751)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-29 12:47:06 +08:00
+								    "minicpmv_26": VLMTestInfo(
 								        models=["openbmb/MiniCPM-V-2_6"],
-												[V1] Override `mm_counts` for dummy data creation (#15703)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-30 18:20:42 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        prompt_formatter=lambda img_prompt: f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n{img_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # noqa: E501
 								        img_idx_to_prompt=lambda idx: "(<image>./</image>)\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        get_stop_token_ids=lambda tok: tok.convert_tokens_to_ids(
 								            ["<|im_end|>", "<|endoftext|>"]
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        ),
-												[Misc] Clean up MiniCPM-V/O code (#15337)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-25 18:22:52 +08:00
+								        hf_output_post_proc=model_utils.minicpmv_trunc_hf_output,
 								        patch_hf_runner=model_utils.minicpmv_26_patch_hf_runner,
 								    ),
-												[Model] support MiniMax-VL-01 model (#16328)

Signed-off-by: qingjun <qingjun@minimaxi.com>
											
										
										
											2025-04-29 12:05:50 +08:00
+								    "minimax_vl_01": VLMTestInfo(
 								        models=["MiniMaxAI/MiniMax-VL-01"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<beginning_of_sentence>user: {img_prompt} assistant:<end_of_sentence>",  # noqa: E501
-												[Model] support MiniMax-VL-01 model (#16328)

Signed-off-by: qingjun <qingjun@minimaxi.com>
											
										
										
											2025-04-29 12:05:50 +08:00
+								        img_idx_to_prompt=lambda _: "<image>",
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        max_model_len=8192,
 								        max_num_seqs=4,
 								        dtype="bfloat16",
 								        hf_output_post_proc=model_utils.minimax_vl_01_hf_output,
 								        patch_hf_runner=model_utils.minimax_vl_01_patch_hf_runner,
 								        auto_cls=AutoModelForImageTextToText,
 								        marks=[large_gpu_mark(min_gb=80)],
 								    ),
-												[Bugfix][V1] Fix molmo text-only inputs (#11676)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-01-06 23:22:25 +08:00
+								    "molmo": VLMTestInfo(
 								        models=["allenai/Molmo-7B-D-0924"],
-												[Model] Support multi-image for Molmo (#15438)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-26 11:26:33 +08:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[VLM] Merged multi-modal processor for Molmo (#12966)


											
										
										
											2025-02-13 20:34:00 +08:00
+								        prompt_formatter=identity,
-												[Bugfix][V1] Fix molmo text-only inputs (#11676)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-01-06 23:22:25 +08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
-												[VLM] Merged multi-modal processor for Molmo (#12966)


											
										
										
											2025-02-13 20:34:00 +08:00
+								        patch_hf_runner=model_utils.molmo_patch_hf_runner,
-												[Bugfix][V1] Fix molmo text-only inputs (#11676)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
											
										
										
											2025-01-06 23:22:25 +08:00
+								    ),
-												[Model] Broadcast Ovis2 implementation to fit Ovis1.6 (#17861)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-12 08:56:30 +08:00
+								    "ovis1_6-gemma2": VLMTestInfo(
 								        models=["AIDC-AI/Ovis1.6-Gemma2-9B"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<bos><start_of_turn>user\n{img_prompt}<end_of_turn>\n<start_of_turn>model\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<image>\n",
-												[Model] Broadcast Ovis2 implementation to fit Ovis1.6 (#17861)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-12 08:56:30 +08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
 								        dtype="half",
 								        # use sdpa mode for hf runner since ovis2 didn't work with flash_attn
 								        hf_model_kwargs={"llm_attn_implementation": "sdpa"},
 								        patch_hf_runner=model_utils.ovis_patch_hf_runner,
 								        marks=[large_gpu_mark(min_gb=32)],
 								    ),
-												[MODEL ADDITION] Ovis2 Model Addition (#15826)

Signed-off-by: Marco <121761685+mlinmg@users.noreply.github.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-04-30 09:33:29 +02:00
+								    "ovis2": VLMTestInfo(
 								        models=["AIDC-AI/Ovis2-1B"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<image>\n",
-												[MODEL ADDITION] Ovis2 Model Addition (#15826)

Signed-off-by: Marco <121761685+mlinmg@users.noreply.github.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-04-30 09:33:29 +02:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
 								        dtype="half",
 								        # use sdpa mode for hf runner since ovis2 didn't work with flash_attn
 								        hf_model_kwargs={"llm_attn_implementation": "sdpa"},
-												[Model] Broadcast Ovis2 implementation to fit Ovis1.6 (#17861)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-12 08:56:30 +08:00
+								        patch_hf_runner=model_utils.ovis_patch_hf_runner,
-												[MODEL ADDITION] Ovis2 Model Addition (#15826)

Signed-off-by: Marco <121761685+mlinmg@users.noreply.github.com>
Signed-off-by: Isotr0py <2037008807@qq.com>
Signed-off-by: isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-04-30 09:33:29 +02:00
+								    ),
-												[Model] support new model ovis2.5 (#23084)

Signed-off-by: myselvess <244285088@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-08-19 21:12:59 +08:00
+								    "ovis2_5": VLMTestInfo(
 								        models=["AIDC-AI/Ovis2.5-2B"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<image>\n",
-												[Model] support new model ovis2.5 (#23084)

Signed-off-by: myselvess <244285088@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-08-19 21:12:59 +08:00
+								        video_idx_to_prompt=lambda idx: "<video>\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        dtype="half",
 								        num_logprobs=10,
 								        patch_hf_runner=model_utils.ovis2_5_patch_hf_runner,
-												[Model] Add Ovis2.5 PP support (#23405)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-08-23 01:46:34 +08:00
+								        hf_model_kwargs={"revision": "refs/pr/5"},
-												[Model] support new model ovis2.5 (#23084)

Signed-off-by: myselvess <244285088@qq.com>
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
											
										
										
											2025-08-19 21:12:59 +08:00
+								    ),
-												[Fix] improve aspect ratio in dummy image generation and add common  VLM tests for PaddleOCR-VL (#28711)

Signed-off-by: dongbo910220 <1275604947@qq.com>
											
										
										
											2025-11-15 00:07:20 +08:00
+								    "paddleocr_vl": VLMTestInfo(
 								        models=["PaddlePaddle/PaddleOCR-VL"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        img_idx_to_prompt=lambda idx: (
 								            "<|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>"
 								        ),
 								        multi_image_prompt=(
 								            "Image-1: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>\n"
 								            "Image-2: <|IMAGE_START|><|IMAGE_PLACEHOLDER|><|IMAGE_END|>\n"
 								            "Describe these two images separately."
 								        ),
 								        max_model_len=8192,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForCausalLM,
 								        image_size_factors=[(), (0.25,)],
-												[CI] Skip paddleocr_vl for transformer 4.57.3 (#29758)

Signed-off-by: Huamin Li <3ericli@gmail.com>
											
										
										
											2025-11-30 20:38:06 -08:00
+								        marks=[
 								            pytest.mark.skipif(
 								                Version(TRANSFORMERS_VERSION) == Version("4.57.3"),
 								                reason="This model is broken in Transformers v4.57.3",
 								            )
 								        ],
-												[Fix] improve aspect ratio in dummy image generation and add common  VLM tests for PaddleOCR-VL (#28711)

Signed-off-by: dongbo910220 <1275604947@qq.com>
											
										
										
											2025-11-15 00:07:20 +08:00
+								    ),
-												[Misc] Update transformers version limits of multi-modal tests (#16381)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-10 14:03:33 +08:00
+								    "phi3v": VLMTestInfo(
 								        models=["microsoft/Phi-3.5-vision-instruct"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|user|>\n{img_prompt}<|end|>\n<|assistant|>\n",  # noqa: E501
-												[Misc] Update transformers version limits of multi-modal tests (#16381)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-10 14:03:33 +08:00
+								        img_idx_to_prompt=lambda idx: f"<|image_{idx}|>\n",
 								        max_model_len=4096,
 								        max_num_seqs=2,
-												[Deprecation][2/N] Replace `--task` with `--runner` and `--convert` (#21470)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-07-28 10:42:40 +08:00
+								        runner="generate",
-												[Bugfix][VLM] Fix failing Phi-4-MM multi-images tests and add vision-speech test (#16424)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-04-11 12:57:16 +08:00
+								        # use sdpa mode for hf runner since phi3v didn't work with flash_attn
 								        hf_model_kwargs={"_attn_implementation": "sdpa"},
-												[Misc] Update transformers version limits of multi-modal tests (#16381)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-04-10 14:03:33 +08:00
+								        use_tokenizer_eos=True,
 								        vllm_output_post_proc=model_utils.phi3v_vllm_to_hf_output,
 								        num_logprobs=10,
 								    ),
-												[CI/Build] Add Model Tests for PixtralHF (#9813)


											
										
										
											2024-11-01 09:55:29 -04:00
+								    "pixtral_hf": VLMTestInfo(
 								        models=["nm-testing/pixtral-12b-FP8-dynamic"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"<s>[INST]{img_prompt}[/INST]",
 								        img_idx_to_prompt=lambda idx: "[IMG]",
 								        max_model_len=8192,
 								        max_num_seqs=2,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] Split up models tests (#10069)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-11-10 03:39:14 +08:00
+								        marks=[large_gpu_mark(min_gb=48)],
-												[CI/Build] Add Model Tests for PixtralHF (#9813)


											
										
										
											2024-11-01 09:55:29 -04:00
+								    ),
-												[VLM] Separate text-only and vision variants of the same model architecture (#13157)


											
										
										
											2025-02-13 22:19:15 +08:00
+								    "qwen_vl": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["Qwen/Qwen-VL"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=identity,
 								        img_idx_to_prompt=lambda idx: f"Picture {idx}: <img></img>\n",
 								        max_model_len=1024,
 								        max_num_seqs=2,
 								        vllm_output_post_proc=model_utils.qwen_vllm_to_hf_output,
 								        prompt_path_encoder=model_utils.qwen_prompt_path_encoder,
 								    ),
-												Upgrade `transformers` to `v4.50.3` (#13905)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-03-31 16:59:37 +01:00
+								    "qwen2_vl": VLMTestInfo(
 								        models=["Qwen/Qwen2-VL-2B-Instruct"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO),
 								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
 								        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        multi_image_prompt="Picture 1: <vlm_image>\nPicture 2: <vlm_image>\nDescribe these two images with one paragraph respectively.",  # noqa: E501
-												Upgrade `transformers` to `v4.50.3` (#13905)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-03-31 16:59:37 +01:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												Upgrade `transformers` to `v4.50.3` (#13905)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-03-31 16:59:37 +01:00
+								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								        marks=[pytest.mark.cpu_model],
 								    ),
-												[Model] Support Skywork-R1V (#15397)

Signed-off-by: jiacai.liu <932997367@qq.com>
Co-authored-by: jiacai.liu <932997367@qq.com>
											
										
										
											2025-03-29 11:39:21 +08:00
+								    "skywork_r1v": VLMTestInfo(
 								        models=["Skywork/Skywork-R1V-38B"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<｜begin▁of▁sentence｜><｜User｜>\n{img_prompt}<｜Assistant｜><think>\n",  # noqa: E501
 								        single_image_prompts=IMAGE_ASSETS.prompts(
 								            {
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								                "stop_sign": "<image>\nWhat's the content in the center of the image?",
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								                "cherry_blossom": "<image>\nWhat is the season?",
 								            }
 								        ),
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        multi_image_prompt="<image>\n<image>\nDescribe the two images in short.",
-												[Model] Support Skywork-R1V (#15397)

Signed-off-by: jiacai.liu <932997367@qq.com>
Co-authored-by: jiacai.liu <932997367@qq.com>
											
										
										
											2025-03-29 11:39:21 +08:00
+								        max_model_len=4096,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.skyworkr1v_patch_hf_runner,
 								        marks=[large_gpu_mark(min_gb=80)],
 								    ),
-												[Model] Add smolvlm support (#16017)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
											
										
										
											2025-04-09 10:12:17 +08:00
+								    "smolvlm": VLMTestInfo(
 								        models=["HuggingFaceTB/SmolVLM2-2.2B-Instruct"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
-												[Misc] Automatically resolve HF processor init kwargs (#22005)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-08-01 13:44:10 +08:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>User:{img_prompt}<end_of_utterance>\nAssistant:",  # noqa: E501
-												[Model] Add smolvlm support (#16017)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
											
										
										
											2025-04-09 10:12:17 +08:00
+								        img_idx_to_prompt=lambda idx: "<image>",
 								        max_model_len=8192,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        hf_output_post_proc=model_utils.smolvlm_trunc_hf_output,
-												[CI/Build] Fix model nightly tests (#26466)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-10-09 14:44:16 +08:00
+								        num_logprobs=10,
-												[Model] Add smolvlm support (#16017)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
											
										
										
											2025-04-09 10:12:17 +08:00
+								    ),
-												[Misc] Automatically resolve HF processor init kwargs (#22005)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-08-01 13:44:10 +08:00
+								    "tarsier": VLMTestInfo(
 								        models=["omni-research/Tarsier-7b"],
 								        test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt} ASSISTANT:",
 								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        patch_hf_runner=model_utils.tarsier_patch_hf_runner,
 								    ),
 								    "tarsier2": VLMTestInfo(
 								        models=["omni-research/Tarsier2-Recap-7b"],
 								        test_type=(
 								            VLMTestType.IMAGE,
 								            VLMTestType.MULTI_IMAGE,
 								            VLMTestType.VIDEO,
 								        ),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>",
 								        video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>",
-												[Misc] Automatically resolve HF processor init kwargs (#22005)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-08-01 13:44:10 +08:00
+								        max_model_len=4096,
 								        max_num_seqs=2,
 								        auto_cls=AutoModelForImageTextToText,
 								        image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)],
 								        marks=[pytest.mark.skip("Model initialization hangs")],
 								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ### Tensor parallel / multi-gpu broadcast tests
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								    "chameleon-broadcast": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["facebook/chameleon-7b"],
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        max_model_len=4096,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        vllm_output_post_proc=lambda vllm_output, model: vllm_output[:2],
 								        hf_output_post_proc=lambda hf_output, model: hf_output[:2],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        comparator=check_outputs_equal,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								        marks=multi_gpu_marks(num_gpus=2),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        **COMMON_BROADCAST_SETTINGS,  # type: ignore
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								    "llava-broadcast": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-1.5-7b-hf"],
 								        prompt_formatter=lambda img_prompt: f"USER: {img_prompt}\nASSISTANT:",
 								        max_model_len=4096,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								        marks=multi_gpu_marks(num_gpus=2),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        **COMMON_BROADCAST_SETTINGS,  # type: ignore
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								    "llava_next-broadcast": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-v1.6-mistral-7b-hf"],
 								        prompt_formatter=lambda img_prompt: f"[INST] {img_prompt} [/INST]",
 								        max_model_len=10240,
-												[CI/Build] Use `AutoModelForImageTextToText` to load VLMs in tests (#14945)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-03-18 02:35:17 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_image_vllm_to_hf_output,
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								        marks=multi_gpu_marks(num_gpus=2),
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        **COMMON_BROADCAST_SETTINGS,  # type: ignore
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
 								    ### Custom input edge-cases for specific models
 								    "intern_vl-diff-patches": VLMTestInfo(
 								        models=["OpenGVLab/InternVL2-2B"],
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n",  # noqa: E501
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=4096,
 								        use_tokenizer_eos=True,
 								        patch_hf_runner=model_utils.internvl_patch_hf_runner,
 								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=inp,
 								                limit_mm_per_prompt={"image": 2},
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								            )
 								            for inp in custom_inputs.different_patch_input_cases_internvl()
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        ],
 								    ),
-												[VLM] Merged multi-modal processors for LLaVA-NeXT-Video and LLaVA-OneVision (#11717)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-01-04 19:40:53 +08:00
+								    "llava_onevision-multiple-images": VLMTestInfo(
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        models=["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"],
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=16384,
 								        max_num_seqs=2,
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs(
 								            "llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
-												Fix per file ruff ignores related to line length (#26262)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-06 06:12:40 +01:00
+								        ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								        vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=custom_inputs.multi_image_multi_aspect_ratio_inputs(
 								                    formatter=lambda vid_prompt: f"<|im_start|>user\n{vid_prompt}<|im_end|>\n<|im_start|>assistant\n",  # noqa: E501
 								                ),
 								                limit_mm_per_prompt={"image": 4},
 								            )
 								        ],
-												[CI] Skip "Multi-Modal Models Test (Extended) 3" test that's broken in current Transformers (#28559)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-11-12 19:38:13 +00:00
+								        marks=[
 								            pytest.mark.skipif(
 								                Version(TRANSFORMERS_VERSION) == Version("4.57.1"),
 								                reason="This model is broken in Transformers v4.57.1",
 								            )
 								        ],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    ),
-												[Bugfix] Fix incorrect qwen2.5-vl attention mask pre-computation (#15200)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-03-21 10:18:04 +08:00
+								    # regression test for https://github.com/vllm-project/vllm/issues/15122
 								    "qwen2_5_vl-windows-attention": VLMTestInfo(
 								        models=["Qwen/Qwen2.5-VL-3B-Instruct"],
 								        test_type=VLMTestType.CUSTOM_INPUTS,
 								        max_model_len=4096,
 								        max_num_seqs=2,
-												[Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-17 20:19:15 +08:00
+								        auto_cls=AutoModelForImageTextToText,
-												[Bugfix] Fix incorrect qwen2.5-vl attention mask pre-computation (#15200)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-03-21 10:18:04 +08:00
+								        vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output,
-												Remove all references to `yapf` as it's no longer used (#26251)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 17:18:11 +01:00
+								        custom_test_opts=[
 								            CustomTestOptions(
 								                inputs=custom_inputs.windows_attention_image_qwen2_5_vl(),
 								                limit_mm_per_prompt={"image": 1},
 								            )
 								        ],
-												[Bugfix] Fix incorrect qwen2.5-vl attention mask pre-computation (#15200)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-03-21 10:18:04 +08:00
+								    ),
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								}
-												[CI/Build] Split up VLM tests (#11083)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2024-12-12 06:18:16 +08:00
+								def _mark_splits(
 								    test_settings: dict[str, VLMTestInfo],
 								    *,
 								    num_groups: int,
 								) -> dict[str, VLMTestInfo]:
 								    name_by_test_info_id = {id(v): k for k, v in test_settings.items()}
 								    test_infos_by_model = defaultdict[str, list[VLMTestInfo]](list)
 								    for info in test_settings.values():
 								        for model in info.models:
 								            test_infos_by_model[model].append(info)
 								    models = sorted(test_infos_by_model.keys())
 								    split_size = math.ceil(len(models) / num_groups)
 								    new_test_settings = dict[str, VLMTestInfo]()
 								    for i in range(num_groups):
 								        models_in_group = models[i * split_size : (i + 1) * split_size]
 								        for model in models_in_group:
 								            for info in test_infos_by_model[model]:
 								                new_marks = (info.marks or []) + [pytest.mark.split(group=i)]
 								                new_info = info._replace(marks=new_marks)
 								                new_test_settings[name_by_test_info_id[id(info)]] = new_info
 								    missing_keys = test_settings.keys() - new_test_settings.keys()
 								    assert not missing_keys, f"Missing keys: {missing_keys}"
 								    return new_test_settings
 								VLM_TEST_SETTINGS = _mark_splits(VLM_TEST_SETTINGS, num_groups=2)
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								### Test wrappers
 								# Wrappers around the core test running func for:
 								# - single image
 								# - multi-image
 								# - image embeddings
 								# - video
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								# - audio
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								# - custom inputs
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_single_image_models(
 								    tmp_path: PosixPath,
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    image_assets: ImageTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_single_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.MULTI_IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_multi_image_models(
 								    tmp_path: PosixPath,
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    image_assets: ImageTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_multi_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.EMBEDDING,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_image_embedding_models(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    image_assets: ImageTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_embedding_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.VIDEO,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_video_models(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    video_assets: VideoTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_video_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        video_assets=video_assets,
 								    )
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.AUDIO,
 								        create_new_process_for_each_test=False,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_audio_models(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    audio_assets: AudioTestAssets,
 								):
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_audio_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        audio_assets=audio_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.CUSTOM_INPUTS,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=False,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_custom_inputs_models(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								):
 								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_custom_inputs_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								    )
 								#### Tests filtering for things running each test as a new process
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_single_image_models_heavy(
 								    tmp_path: PosixPath,
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    image_assets: ImageTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_single_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.MULTI_IMAGE,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_multi_image_models_heavy(
 								    tmp_path: PosixPath,
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    image_assets: ImageTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_multi_image_test(
 								        tmp_path=tmp_path,
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.EMBEDDING,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_image_embedding_models_heavy(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    image_assets: ImageTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_embedding_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        image_assets=image_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.VIDEO,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_video_models_heavy(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    video_assets: VideoTestAssets,
 								):
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_video_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        video_assets=video_assets,
 								    )
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.AUDIO,
 								        create_new_process_for_each_test=True,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								)
-												[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
											
										
										
											2025-09-21 01:50:58 +08:00
+								def test_audio_models_heavy(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
 								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
 								    audio_assets: AudioTestAssets,
 								):
-												[Misc] Consolidate Audio tests into multimodal common generation tests (#18214)

Signed-off-by: Isotr0py <2037008807@qq.com>
											
										
										
											2025-05-16 17:18:08 +08:00
+								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_audio_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								        audio_assets=audio_assets,
 								    )
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								@pytest.mark.parametrize(
 								    "model_type,test_case",
 								    get_parametrized_options(
 								        VLM_TEST_SETTINGS,
 								        test_type=VLMTestType.CUSTOM_INPUTS,
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								        create_new_process_for_each_test=True,
-												Convert formatting to use `ruff` instead of `yapf` + `isort` (#26247)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-10-05 15:06:22 +01:00
+								    ),
-												Update `pre-commit` hooks (#12475)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
											
										
										
											2025-01-28 00:23:08 +00:00
+								)
-												[Bugfix][ROCm] running new process using spawn method for rocm in tests. (#14810)

Signed-off-by: vllmellm <vllm.ellm@embeddedllm.com>
Signed-off-by: tjtanaa <tunjian.tan@embeddedllm.com>
Co-authored-by: TJian <tunjian.tan@embeddedllm.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
											
										
										
											2025-03-17 19:33:35 +08:00
+								@create_new_process_for_each_test()
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								def test_custom_inputs_models_heavy(
 								    model_type: str,
 								    test_case: ExpandableVLMTestArgs,
-												Update deprecated Python 3.8 typing (#13971)


											
										
										
											2025-03-03 01:34:51 +00:00
+								    hf_runner: type[HfRunner],
 								    vllm_runner: type[VllmRunner],
-												[CI/Build] VLM Test Consolidation (#9372)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
											
										
										
											2024-10-30 10:32:17 -06:00
+								):
 								    model_test_info = VLM_TEST_SETTINGS[model_type]
 								    runners.run_custom_inputs_test(
 								        model_test_info=model_test_info,
 								        test_case=test_case,
 								        hf_runner=hf_runner,
 								        vllm_runner=vllm_runner,
 								    )