# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import pytest from vllm.assets.image import ImageAsset from vllm.assets.video import VideoAsset from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.multimodal import MultiModalUUIDDict from vllm.sampling_params import SamplingParams from vllm.v1.engine.input_processor import InputProcessor cherry_pil_image = ImageAsset("cherry_blossom").pil_image stop_pil_image = ImageAsset("stop_sign").pil_image baby_reading_np_ndarrays = VideoAsset("baby_reading").np_ndarrays def _build_input_processor( *, mm_cache_gb: float = 4.0, enable_prefix_caching: bool = True ) -> InputProcessor: model_config = ModelConfig( model="Qwen/Qwen2.5-VL-3B-Instruct", skip_tokenizer_init=True, max_model_len=128, mm_processor_cache_gb=mm_cache_gb, ) vllm_config = VllmConfig( model_config=model_config, cache_config=CacheConfig(enable_prefix_caching=enable_prefix_caching), ) return InputProcessor(vllm_config) def test_multi_modal_uuids_length_mismatch_raises(): input_processor = _build_input_processor() prompt = { "prompt": "USER: \nDescribe\nASSISTANT:", "multi_modal_data": {"image": [cherry_pil_image, stop_pil_image]}, # Mismatch: 2 items but only 1 uuid provided "multi_modal_uuids": {"image": ["hash_cherry"]}, } with pytest.raises(ValueError, match="must have same length as"): input_processor.process_inputs( request_id="req-1", prompt=prompt, # type: ignore[arg-type] params=SamplingParams(), ) def test_multi_modal_uuids_missing_modality_raises(): input_processor = _build_input_processor() prompt = { "prompt": "USER: