[V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-09-21 01:50:58 +08:00
parent d88918e4c2
commit bef180f009
8 changed files with 195 additions and 214 deletions
--- a/tests/models/multimodal/generation/test_pixtral.py
+++ b/tests/models/multimodal/generation/test_pixtral.py
@@ -12,13 +12,12 @@ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
 from mistral_common.tokens.tokenizers.multimodal import image_from_chunk
 from transformers import AutoProcessor

-from vllm import RequestOutput, SamplingParams, TextPrompt, TokensPrompt
+from vllm import SamplingParams, TextPrompt, TokensPrompt
 from vllm.multimodal import MultiModalDataBuiltins
-from vllm.multimodal.inputs import PlaceholderRange
 from vllm.sequence import Logprob, SampleLogprobs

 from ....utils import VLLM_PATH, large_gpu_test
-from ...utils import check_logprobs_close, dummy_hf_overrides
+from ...utils import check_logprobs_close

 if TYPE_CHECKING:
    from _typeshed import StrPath
@@ -185,47 +184,3 @@ def test_chat(vllm_runner, max_model_len: int, model: str, dtype: str,
                         outputs_1_lst=logprobs,
                         name_0="h100_ref",
                         name_1="output")
-
-
-@pytest.mark.parametrize(
-    "image_urls,expected_ranges",
-    [(IMG_URLS[:1], [PlaceholderRange(offset=11, length=494)]),
-     (IMG_URLS[1:4], [
-         PlaceholderRange(offset=11, length=266),
-         PlaceholderRange(offset=277, length=1056),
-         PlaceholderRange(offset=1333, length=418)
-     ])])
-def test_multi_modal_placeholders(vllm_runner, image_urls: list[str],
-                                  expected_ranges: list[PlaceholderRange],
-                                  local_asset_server, monkeypatch) -> None:
-    local_image_urls = [local_asset_server.url_for(u) for u in image_urls]
-    prompt = _create_engine_inputs_hf(local_image_urls)
-
-    # This placeholder checking test only works with V0 engine
-    # where `multi_modal_placeholders` is returned with `RequestOutput`
-    monkeypatch.setenv("VLLM_USE_V1", "0")
-    with vllm_runner(
-            "mistral-community/pixtral-12b",
-            max_model_len=8192,
-            limit_mm_per_prompt=LIMIT_MM_PER_PROMPT,
-            load_format="dummy",
-            hf_overrides=dummy_hf_overrides,
-    ) as vllm_model:
-        outputs = vllm_model.llm.generate(prompt)
-
-        assert len(outputs) == 1, f"{len(outputs)=}"
-        output: RequestOutput = outputs[0]
-        assert hasattr(output,
-                       "multi_modal_placeholders"), f"{output.__dict__=}"
-        assert "image" in output.multi_modal_placeholders, \
-            f"{output.multi_modal_placeholders.keys()=}"
-        image_placeholder_ranges: list[
-            PlaceholderRange] = output.multi_modal_placeholders["image"]
-        assert len(image_placeholder_ranges) == len(
-            expected_ranges), f"{image_placeholder_ranges=}"
-        for real_range, expected_range in zip(image_placeholder_ranges,
-                                              expected_ranges):
-            assert real_range.offset == expected_range.offset, \
-                f"{real_range=} {expected_range=}"
-            assert real_range.length == expected_range.length, \
-                f"{real_range=} {expected_range=}"