diff --git a/tests/entrypoints/llm/test_chat.py b/tests/entrypoints/llm/test_chat.py index dc72ffa0e..ba3b80320 100644 --- a/tests/entrypoints/llm/test_chat.py +++ b/tests/entrypoints/llm/test_chat.py @@ -195,18 +195,15 @@ def test_chat_batch_failure_cleanup(llm_for_failure_test): valid_msg = [{"role": "user", "content": "Hello"}] long_text = "This is a very long text to test the error " * 50 invalid_msg = [{"role": "user", "content": long_text}] - batch_1 = [ - valid_msg, - valid_msg, - invalid_msg, - ] - batch_2 = [ - valid_msg, - valid_msg, - ] + + batch_1 = [valid_msg, valid_msg, invalid_msg] + batch_2 = [valid_msg, valid_msg] sampling_params = SamplingParams(temperature=0, max_tokens=10) + with pytest.raises(ValueError, match="context length is only"): llm.chat(batch_1, sampling_params=sampling_params) + assert llm.llm_engine.get_num_unfinished_requests() == 0 + outputs_2 = llm.chat(batch_2, sampling_params=sampling_params) assert len(outputs_2) == len(batch_2) assert llm.llm_engine.get_num_unfinished_requests() == 0 diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index a085d6e2f..7f18d5b03 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -489,8 +489,9 @@ def _assert_inputs_equal( if ignore_mm_keys is None: ignore_mm_keys = set() - a_rest = {k: v for k, v in a.items() if k != "mm_kwargs"} - b_rest = {k: v for k, v in b.items() if k != "mm_kwargs"} + ignore_prompt_keys = ("prompt", "mm_kwargs") + a_rest = {k: v for k, v in a.items() if k not in ignore_prompt_keys} + b_rest = {k: v for k, v in b.items() if k not in ignore_prompt_keys} assert a_rest == b_rest, msg diff --git a/tests/renderers/test_process_multi_modal_uuids.py b/tests/renderers/test_process_multi_modal_uuids.py new file mode 100644 index 000000000..8d9fea28b --- /dev/null +++ b/tests/renderers/test_process_multi_modal_uuids.py @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest + +from vllm.assets.image import ImageAsset +from vllm.assets.video import VideoAsset +from vllm.config import CacheConfig, ModelConfig, VllmConfig +from vllm.renderers.hf import HfRenderer +from vllm.tokenizers.registry import tokenizer_args_from_config + +cherry_pil_image = ImageAsset("cherry_blossom").pil_image +stop_pil_image = ImageAsset("stop_sign").pil_image +baby_reading_np_ndarrays = VideoAsset("baby_reading").np_ndarrays + + +def _build_renderer( + *, mm_cache_gb: float = 4.0, enable_prefix_caching: bool = True +) -> HfRenderer: + model_config = ModelConfig( + model="Qwen/Qwen2.5-VL-3B-Instruct", + max_model_len=128, + mm_processor_cache_gb=mm_cache_gb, + ) + + vllm_config = VllmConfig( + model_config=model_config, + cache_config=CacheConfig(enable_prefix_caching=enable_prefix_caching), + ) + + _, tokenizer_name, _, kwargs = tokenizer_args_from_config(model_config) + + return HfRenderer.from_config( + vllm_config, + tokenizer_kwargs={**kwargs, "tokenizer_name": tokenizer_name}, + ) + + +def test_multi_modal_uuids_length_mismatch_raises(): + renderer = _build_renderer() + + mm_data = {"image": [cherry_pil_image, stop_pil_image]} + + # Mismatch: 2 items but only 1 uuid provided + mm_uuids = {"image": ["hash_cherry"]} + + mm_processor = renderer.get_mm_processor() + mm_items = mm_processor.info.parse_mm_data(mm_data) + + with pytest.raises(ValueError, match="must have same length as"): + renderer._process_mm_uuids(mm_data, mm_items, mm_uuids, "req-1") + + +def test_multi_modal_uuids_missing_modality_raises(): + renderer = _build_renderer() + + mm_data = { + "image": [cherry_pil_image], + "video": None, + } + + # Only image uuids provided; video missing should raise + mm_uuids = {"image": ["hash_cherry"]} + + mm_processor = renderer.get_mm_processor() + mm_items = mm_processor.info.parse_mm_data(mm_data) + + with pytest.raises(ValueError, match="is empty but .* is missing"): + renderer._process_mm_uuids(mm_data, mm_items, mm_uuids, "req-2") + + +@pytest.mark.parametrize( + "mm_cache_gb, enable_prefix_caching", + [ + (4.0, True), # default behavior + (4.0, False), # prefix caching disabled + (0.0, True), # processor cache disabled + ], +) +def test_multi_modal_uuids_accepts_none_and_passes_through( + monkeypatch, mm_cache_gb: float, enable_prefix_caching: bool +): + renderer = _build_renderer( + mm_cache_gb=mm_cache_gb, + enable_prefix_caching=enable_prefix_caching, + ) + + mm_data = { + "image": [cherry_pil_image, stop_pil_image], + "video": baby_reading_np_ndarrays, + } + + # Use a consistent two-image scenario across all configurations + mm_uuids = {"image": [None, "hash_stop"], "video": None} + + mm_processor = renderer.get_mm_processor() + mm_items = mm_processor.info.parse_mm_data(mm_data) + processed_mm_uuids = renderer._process_mm_uuids( + mm_data, mm_items, mm_uuids, "req-3" + ) + + assert processed_mm_uuids == mm_uuids + + +@pytest.mark.parametrize( + "mm_cache_gb, enable_prefix_caching", + [ + (4.0, True), # default behavior + (4.0, False), # prefix caching disabled + (0.0, True), # processor cache disabled + ], +) +def test_multi_modal_uuids_accepts_empty( + monkeypatch, mm_cache_gb: float, enable_prefix_caching: bool +): + renderer = _build_renderer( + mm_cache_gb=mm_cache_gb, + enable_prefix_caching=enable_prefix_caching, + ) + + # While None means cached multi-modal input requiring UUIDs + # an empty list means no multi-modal input + mm_data = {"image": [], "video": []} # type: ignore[var-annotated] + mm_uuids = {"image": [], "video": None} # type: ignore[var-annotated] + + mm_processor = renderer.get_mm_processor() + mm_items = mm_processor.info.parse_mm_data(mm_data) + processed_mm_uuids = renderer._process_mm_uuids( + mm_data, mm_items, mm_uuids, "req-4" + ) + + assert processed_mm_uuids == mm_uuids + + +def test_multi_modal_uuids_ignored_when_caching_disabled(monkeypatch): + # When both processor cache is 0 and prefix caching disabled, the + # processor builds overrides from request id instead of using user UUIDs. + renderer = _build_renderer(mm_cache_gb=0.0, enable_prefix_caching=False) + + request_id = "req-42" + mm_data = { + "image": [cherry_pil_image, stop_pil_image], + "video": baby_reading_np_ndarrays, + } + mm_uuids = {"image": ["hash_cherry", "hash_stop"], "video": ["hash_video"]} + + mm_processor = renderer.get_mm_processor() + mm_items = mm_processor.info.parse_mm_data(mm_data) + processed_mm_uuids = renderer._process_mm_uuids( + mm_data, mm_items, mm_uuids, request_id + ) + + # Expect request-id-based overrides are passed through + assert set(mm_uuids.keys()) == {"image", "video"} + assert len(mm_uuids["image"]) == 2 + assert len(mm_uuids["video"]) == 1 + assert processed_mm_uuids["image"][0].startswith( + f"{request_id}-image-" + ) and processed_mm_uuids["image"][0].endswith("-0") + assert processed_mm_uuids["image"][1].startswith( + f"{request_id}-image-" + ) and processed_mm_uuids["image"][1].endswith("-1") + assert processed_mm_uuids["video"][0].startswith( + f"{request_id}-video-" + ) and processed_mm_uuids["video"][0].endswith("-0") diff --git a/tests/samplers/test_beam_search.py b/tests/samplers/test_beam_search.py index 830332298..b2df9af6f 100644 --- a/tests/samplers/test_beam_search.py +++ b/tests/samplers/test_beam_search.py @@ -20,7 +20,6 @@ MM_BEAM_WIDTHS = [2] MODELS = ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"] -@pytest.mark.skip_v1 # V1 engine does not yet support beam search @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half"]) @pytest.mark.parametrize("max_tokens", MAX_TOKENS) @@ -62,7 +61,6 @@ def test_beam_search_single_input( ) -@pytest.mark.skip_v1 # V1 engine does not yet support beam search @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half"]) @pytest.mark.parametrize("max_tokens", MAX_TOKENS) diff --git a/tests/v1/engine/test_process_multi_modal_uuids.py b/tests/v1/engine/test_process_multi_modal_uuids.py deleted file mode 100644 index 4170de173..000000000 --- a/tests/v1/engine/test_process_multi_modal_uuids.py +++ /dev/null @@ -1,174 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.assets.image import ImageAsset -from vllm.assets.video import VideoAsset -from vllm.config import CacheConfig, ModelConfig, VllmConfig -from vllm.multimodal import MultiModalUUIDDict -from vllm.sampling_params import SamplingParams -from vllm.v1.engine.input_processor import InputProcessor - -cherry_pil_image = ImageAsset("cherry_blossom").pil_image -stop_pil_image = ImageAsset("stop_sign").pil_image -baby_reading_np_ndarrays = VideoAsset("baby_reading").np_ndarrays - - -def _build_input_processor( - *, mm_cache_gb: float = 4.0, enable_prefix_caching: bool = True -) -> InputProcessor: - model_config = ModelConfig( - model="Qwen/Qwen2.5-VL-3B-Instruct", - max_model_len=128, - mm_processor_cache_gb=mm_cache_gb, - ) - - vllm_config = VllmConfig( - model_config=model_config, - cache_config=CacheConfig(enable_prefix_caching=enable_prefix_caching), - ) - - return InputProcessor(vllm_config) - - -def test_multi_modal_uuids_length_mismatch_raises(): - input_processor = _build_input_processor() - - prompt = { - "prompt": "USER: \nDescribe\nASSISTANT:", - "multi_modal_data": {"image": [cherry_pil_image, stop_pil_image]}, - # Mismatch: 2 items but only 1 uuid provided - "multi_modal_uuids": {"image": ["hash_cherry"]}, - } - - with pytest.raises(ValueError, match="must have same length as"): - input_processor.process_inputs( - request_id="req-1", - prompt=prompt, # type: ignore[arg-type] - params=SamplingParams(), - ) - - -def test_multi_modal_uuids_missing_modality_raises(): - input_processor = _build_input_processor() - - prompt = { - "prompt": "USER: