[Refactor] Relocate completion and chat completion tests (#37125)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
2026-03-16 23:31:23 -04:00
parent f04d5226f8
commit 384dc7f77b
26 changed files with 41 additions and 48 deletions
--- a/tests/entrypoints/openai/chat_completion/test_vision.py
+++ b/tests/entrypoints/openai/chat_completion/test_vision.py
@@ -0,0 +1,687 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import json
+
+import openai
+import pytest
+import pytest_asyncio
+from transformers import AutoProcessor
+
+from tests.utils import ROCM_ENV_OVERRIDES, ROCM_EXTRA_ARGS, RemoteOpenAIServer
+from vllm.multimodal.media import MediaWithBytes
+from vllm.multimodal.utils import encode_image_url, fetch_image
+from vllm.platforms import current_platform
+
+MODEL_NAME = "microsoft/Phi-3.5-vision-instruct"
+MAXIMUM_IMAGES = 2
+
+# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
+TEST_IMAGE_ASSETS = [
+    "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+    "Grayscale_8bits_palette_sample_image.png",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/Grayscale_8bits_palette_sample_image.png",
+    "1280px-Venn_diagram_rgb.svg.png",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/1280px-Venn_diagram_rgb.svg.png",
+    "RGBA_comp.png",  # "https://vllm-public-assets.s3.us-west-2.amazonaws.com/vision_model_images/RGBA_comp.png",
+]
+
+# Required terms for beam search validation
+# Each entry is a list of term groups - ALL groups must match
+# Each group is a list of alternatives - at least ONE term in the group must appear
+# This provides semantic validation while allowing wording variation
+REQUIRED_BEAM_SEARCH_TERMS = [
+    # Boardwalk image: must have "boardwalk" AND ("wooden" or "wood")
+    [["boardwalk"], ["wooden", "wood"]],
+    # Parrots image: must have ("parrot" or "bird") AND "two"
+    [["parrot", "bird"], ["two"]],
+    # Venn diagram: must have "venn" AND "diagram"
+    [["venn"], ["diagram"]],
+    # Gradient image: must have "gradient" AND ("color" or "spectrum")
+    [["gradient"], ["color", "spectrum"]],
+]
+
+
+def check_output_matches_terms(content: str, term_groups: list[list[str]]) -> bool:
+    """
+    Check if content matches all required term groups.
+    Each term group requires at least one of its terms to be present.
+    All term groups must be satisfied.
+    """
+    content_lower = content.lower()
+    return all(
+        any(term.lower() in content_lower for term in group) for group in term_groups
+    )
+
+
+def assert_non_empty_content(chat_completion, *, context: str = "") -> str:
+    """Assert the first choice has non-empty string content; return it.
+
+    Provides a detailed failure message including the full ChatCompletion
+    response so flaky / model-quality issues are easy to diagnose.
+    """
+    prefix = f"[{context}] " if context else ""
+    choice = chat_completion.choices[0]
+    content = choice.message.content
+
+    assert content is not None, (
+        f"{prefix}Expected non-None content but got None. "
+        f"finish_reason={choice.finish_reason!r}, "
+        f"full message={choice.message!r}, "
+        f"usage={chat_completion.usage!r}"
+    )
+    assert isinstance(content, str), (
+        f"{prefix}Expected str content, got {type(content).__name__}: {content!r}"
+    )
+    assert len(content) > 0, (
+        f"{prefix}Expected non-empty content but got empty string. "
+        f"finish_reason={choice.finish_reason!r}, "
+        f"full message={choice.message!r}, "
+        f"usage={chat_completion.usage!r}"
+    )
+    return content
+
+
+@pytest.fixture(scope="module")
+def server():
+    args = [
+        "--runner",
+        "generate",
+        "--max-model-len",
+        "2048",
+        "--max-num-seqs",
+        "5",
+        "--enforce-eager",
+        "--trust-remote-code",
+        "--limit-mm-per-prompt",
+        json.dumps({"image": MAXIMUM_IMAGES}),
+        *ROCM_EXTRA_ARGS,
+    ]
+
+    # ROCm: Increase timeouts to handle potential network delays and slower
+    # video processing when downloading multiple videos from external sources
+    env_overrides = {
+        **ROCM_ENV_OVERRIDES,
+        **(
+            {
+                "VLLM_VIDEO_FETCH_TIMEOUT": "120",
+                "VLLM_ENGINE_ITERATION_TIMEOUT_S": "300",
+            }
+            if current_platform.is_rocm()
+            else {}
+        ),
+    }
+
+    with RemoteOpenAIServer(MODEL_NAME, args, env_dict=env_overrides) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.fixture(scope="session")
+def url_encoded_image(local_asset_server) -> dict[str, str]:
+    return {
+        image_asset: encode_image_url(local_asset_server.get_image_asset(image_asset))
+        for image_asset in TEST_IMAGE_ASSETS
+    }
+
+
+def dummy_messages_from_image_url(
+    image_urls: str | list[str],
+    content_text: str = "What's in this image?",
+):
+    if isinstance(image_urls, str):
+        image_urls = [image_urls]
+
+    return [
+        {
+            "role": "user",
+            "content": [
+                *(
+                    {"type": "image_url", "image_url": {"url": image_url}}
+                    for image_url in image_urls
+                ),
+                {"type": "text", "text": content_text},
+            ],
+        }
+    ]
+
+
+def describe_image_messages(
+    image_url: str, *, extra_image_fields: dict | None = None
+) -> list[dict]:
+    """Build the system + user messages used by the completions-with-image
+    family of tests. *extra_image_fields* is merged into the top-level
+    image content block (for uuid / bad-key tests)."""
+    image_block: dict = {
+        "type": "image_url",
+        "image_url": {"url": image_url},
+    }
+    if extra_image_fields:
+        image_block.update(extra_image_fields)
+
+    return [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Describe this image."},
+                image_block,
+            ],
+        },
+    ]
+
+
+async def complete_and_check(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+    messages: list[dict],
+    *,
+    context: str,
+    max_completion_tokens: int = 50,
+    temperature: float = 0.0,
+) -> str:
+    """Run a chat completion and assert the output is non-empty.
+    Returns the content string."""
+    chat_completion = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=max_completion_tokens,
+        temperature=temperature,
+    )
+    return assert_non_empty_content(chat_completion, context=context)
+
+
+def get_hf_prompt_tokens(model_name, content, image_url):
+    processor = AutoProcessor.from_pretrained(
+        model_name, trust_remote_code=True, num_crops=4
+    )
+
+    placeholder = "<|image_1|>\n"
+    messages = [
+        {
+            "role": "user",
+            "content": f"{placeholder}{content}",
+        }
+    ]
+    image = fetch_image(image_url)
+    # Unwrap MediaWithBytes if present
+    if isinstance(image, MediaWithBytes):
+        image = image.media
+    images = [image]
+
+    prompt = processor.tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+    inputs = processor(prompt, images, return_tensors="pt")
+
+    return inputs.input_ids.shape[1]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
+async def test_single_chat_session_image(
+    client: openai.AsyncOpenAI, model_name: str, image_url: str
+):
+    content_text = "What's in this image?"
+    messages = dummy_messages_from_image_url(image_url, content_text)
+
+    max_completion_tokens = 10
+    chat_completion = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=max_completion_tokens,
+        logprobs=True,
+        temperature=0.0,
+        top_logprobs=5,
+    )
+    assert len(chat_completion.choices) == 1, (
+        f"Expected 1 choice, got {len(chat_completion.choices)}"
+    )
+
+    choice = chat_completion.choices[0]
+    assert choice.finish_reason == "length", (
+        f"Expected finish_reason='length' (capped at {max_completion_tokens} "
+        f"tokens), got {choice.finish_reason!r}. "
+        f"content={choice.message.content!r}"
+    )
+
+    hf_prompt_tokens = get_hf_prompt_tokens(model_name, content_text, image_url)
+    expected_usage = openai.types.CompletionUsage(
+        completion_tokens=max_completion_tokens,
+        prompt_tokens=hf_prompt_tokens,
+        total_tokens=hf_prompt_tokens + max_completion_tokens,
+    )
+    assert chat_completion.usage == expected_usage, (
+        f"Usage mismatch: got {chat_completion.usage!r}, expected {expected_usage!r}"
+    )
+
+    message = choice.message
+    assert message.content is not None and len(message.content) >= 10, (
+        f"Expected content with >=10 chars, got {message.content!r}"
+    )
+    assert message.role == "assistant", (
+        f"Expected role='assistant', got {message.role!r}"
+    )
+
+    messages.append({"role": "assistant", "content": message.content})
+
+    # test multi-turn dialogue
+    messages.append({"role": "user", "content": "express your result in json"})
+    await complete_and_check(
+        client,
+        model_name,
+        messages,
+        context=f"multi-turn follow-up for {image_url}",
+        max_completion_tokens=10,
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
+async def test_error_on_invalid_image_url_type(
+    client: openai.AsyncOpenAI, model_name: str, image_url: str
+):
+    content_text = "What's in this image?"
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": image_url},
+                {"type": "text", "text": content_text},
+            ],
+        }
+    ]
+
+    # image_url should be a dict {"url": "some url"}, not directly a string
+    with pytest.raises(openai.BadRequestError):
+        await client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            max_completion_tokens=10,
+            temperature=0.0,
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
+async def test_single_chat_session_image_beamsearch(
+    client: openai.AsyncOpenAI, model_name: str, image_url: str
+):
+    content_text = "What's in this image?"
+    messages = dummy_messages_from_image_url(image_url, content_text)
+
+    chat_completion = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        n=2,
+        max_completion_tokens=10,
+        logprobs=True,
+        top_logprobs=5,
+        extra_body=dict(use_beam_search=True),
+    )
+    assert len(chat_completion.choices) == 2, (
+        f"Expected 2 beam search choices, got {len(chat_completion.choices)}"
+    )
+
+    content_0 = chat_completion.choices[0].message.content
+    content_1 = chat_completion.choices[1].message.content
+    assert content_0 != content_1, (
+        f"Beam search should produce different outputs for {image_url}, "
+        f"but both returned: {content_0!r}"
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("raw_image_url", TEST_IMAGE_ASSETS)
+@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
+async def test_single_chat_session_image_base64encoded(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+    raw_image_url: str,
+    image_url: str,
+    url_encoded_image: dict[str, str],
+):
+    content_text = "What's in this image?"
+    messages = dummy_messages_from_image_url(
+        url_encoded_image[raw_image_url],
+        content_text,
+    )
+
+    max_completion_tokens = 10
+    # test single completion
+    chat_completion = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=max_completion_tokens,
+        logprobs=True,
+        temperature=0.0,
+        top_logprobs=5,
+    )
+    assert len(chat_completion.choices) == 1, (
+        f"Expected 1 choice, got {len(chat_completion.choices)}"
+    )
+
+    choice = chat_completion.choices[0]
+    assert choice.finish_reason == "length", (
+        f"Expected finish_reason='length', got {choice.finish_reason!r}. "
+        f"content={choice.message.content!r}"
+    )
+
+    hf_prompt_tokens = get_hf_prompt_tokens(model_name, content_text, image_url)
+    expected_usage = openai.types.CompletionUsage(
+        completion_tokens=max_completion_tokens,
+        prompt_tokens=hf_prompt_tokens,
+        total_tokens=hf_prompt_tokens + max_completion_tokens,
+    )
+    assert chat_completion.usage == expected_usage, (
+        f"Usage mismatch: got {chat_completion.usage!r}, expected {expected_usage!r}"
+    )
+
+    message = choice.message
+    assert message.content is not None and len(message.content) >= 10, (
+        f"Expected content with >=10 chars, got {message.content!r}"
+    )
+    assert message.role == "assistant", (
+        f"Expected role='assistant', got {message.role!r}"
+    )
+
+    messages.append({"role": "assistant", "content": message.content})
+
+    # test multi-turn dialogue
+    messages.append({"role": "user", "content": "express your result in json"})
+    await complete_and_check(
+        client,
+        model_name,
+        messages,
+        context=f"multi-turn base64 follow-up for {raw_image_url}",
+        max_completion_tokens=10,
+        temperature=0.0,
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("image_idx", list(range(len(TEST_IMAGE_ASSETS))))
+async def test_single_chat_session_image_base64encoded_beamsearch(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+    image_idx: int,
+    url_encoded_image: dict[str, str],
+):
+    # NOTE: This test validates that we pass MM data through beam search
+    raw_image_url = TEST_IMAGE_ASSETS[image_idx]
+    required_terms = REQUIRED_BEAM_SEARCH_TERMS[image_idx]
+
+    messages = dummy_messages_from_image_url(url_encoded_image[raw_image_url])
+
+    chat_completion = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        n=2,
+        max_completion_tokens=10,
+        temperature=0.0,
+        extra_body=dict(use_beam_search=True),
+    )
+    assert len(chat_completion.choices) == 2, (
+        f"Expected 2 beam search choices for image {image_idx} "
+        f"({raw_image_url}), got {len(chat_completion.choices)}"
+    )
+
+    # Verify beam search produces two different non-empty outputs
+    content_0 = chat_completion.choices[0].message.content
+    content_1 = chat_completion.choices[1].message.content
+
+    # Emit beam search outputs for debugging
+    print(
+        f"Beam search outputs for image {image_idx} ({raw_image_url}): "
+        f"Output 0: {content_0!r}, Output 1: {content_1!r}"
+    )
+
+    assert content_0, (
+        f"First beam output is empty for image {image_idx} ({raw_image_url}). "
+        f"finish_reason={chat_completion.choices[0].finish_reason!r}"
+    )
+    assert content_1, (
+        f"Second beam output is empty for image {image_idx} "
+        f"({raw_image_url}). "
+        f"finish_reason={chat_completion.choices[1].finish_reason!r}"
+    )
+    assert content_0 != content_1, (
+        f"Beam search produced identical outputs for image {image_idx} "
+        f"({raw_image_url}): {content_0!r}"
+    )
+
+    # Verify each output contains the required terms for this image
+    for i, content in enumerate([content_0, content_1]):
+        assert check_output_matches_terms(content, required_terms), (
+            f"Beam output {i} for image {image_idx} ({raw_image_url}) "
+            f"doesn't match required terms.\n"
+            f"  content: {content!r}\n"
+            f"  required (all groups, >=1 per group): {required_terms}"
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
+async def test_chat_streaming_image(
+    client: openai.AsyncOpenAI, model_name: str, image_url: str
+):
+    messages = dummy_messages_from_image_url(image_url)
+
+    # test single completion
+    chat_completion = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=10,
+        temperature=0.0,
+    )
+    output = chat_completion.choices[0].message.content
+    stop_reason = chat_completion.choices[0].finish_reason
+
+    # test streaming
+    stream = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=10,
+        temperature=0.0,
+        stream=True,
+    )
+    chunks: list[str] = []
+    finish_reason_count = 0
+    async for chunk in stream:
+        delta = chunk.choices[0].delta
+        if delta.role:
+            assert delta.role == "assistant", (
+                f"Expected role='assistant' in stream delta, got {delta.role!r}"
+            )
+        if delta.content:
+            chunks.append(delta.content)
+        if chunk.choices[0].finish_reason is not None:
+            finish_reason_count += 1
+    # finish reason should only return in last block
+    assert finish_reason_count == 1, (
+        f"Expected exactly 1 finish_reason across stream chunks, "
+        f"got {finish_reason_count}"
+    )
+    assert chunk.choices[0].finish_reason == stop_reason, (
+        f"Stream finish_reason={chunk.choices[0].finish_reason!r} "
+        f"doesn't match non-stream finish_reason={stop_reason!r}"
+    )
+
+    streamed_text = "".join(chunks)
+    assert streamed_text == output, (
+        f"Streamed output doesn't match non-streamed for {image_url}.\n"
+        f"  streamed:     {streamed_text!r}\n"
+        f"  non-streamed: {output!r}"
+    )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize(
+    "image_urls",
+    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
+    indirect=True,
+)
+async def test_multi_image_input(
+    client: openai.AsyncOpenAI, model_name: str, image_urls: list[str]
+):
+    messages = dummy_messages_from_image_url(image_urls)
+
+    if len(image_urls) > MAXIMUM_IMAGES:
+        with pytest.raises(openai.BadRequestError):  # test multi-image input
+            await client.chat.completions.create(
+                model=model_name,
+                messages=messages,
+                max_completion_tokens=10,
+                temperature=0.0,
+            )
+
+        # the server should still work afterwards
+        completion = await client.completions.create(
+            model=model_name,
+            prompt=[0, 0, 0, 0, 0],
+            max_tokens=5,
+            temperature=0.0,
+        )
+        assert completion.choices[0].text is not None, (
+            "Server failed to produce output after rejecting over-limit "
+            "multi-image request"
+        )
+    else:
+        await complete_and_check(
+            client,
+            model_name,
+            messages,
+            context=f"multi-image input ({len(image_urls)} images)",
+            max_completion_tokens=10,
+            temperature=0.0,
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize(
+    "image_urls",
+    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
+    indirect=True,
+)
+async def test_completions_with_image(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+    image_urls: list[str],
+):
+    for image_url in image_urls:
+        messages = describe_image_messages(image_url)
+        await complete_and_check(
+            client,
+            model_name,
+            messages,
+            context=f"completions_with_image url={image_url}",
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize(
+    "image_urls",
+    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
+    indirect=True,
+)
+async def test_completions_with_image_with_uuid(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+    image_urls: list[str],
+):
+    for image_url in image_urls:
+        messages = describe_image_messages(
+            image_url,
+            extra_image_fields={"uuid": image_url},
+        )
+        await complete_and_check(
+            client,
+            model_name,
+            messages,
+            context=f"uuid first request url={image_url}",
+        )
+
+        cached_messages: list[dict] = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Describe this image."},
+                    {"type": "image_url", "image_url": {}, "uuid": image_url},
+                ],
+            },
+        ]
+        await complete_and_check(
+            client,
+            model_name,
+            cached_messages,
+            context=f"uuid cached (empty image) uuid={image_url}",
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+async def test_completions_with_empty_image_with_uuid_without_cache_hit(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+):
+    with pytest.raises(openai.BadRequestError):
+        await client.chat.completions.create(
+            messages=[
+                {"role": "system", "content": "You are a helpful assistant."},
+                {
+                    "role": "user",
+                    "content": [
+                        {"type": "text", "text": "Describe this image."},
+                        {
+                            "type": "image_url",
+                            "image_url": {},
+                            "uuid": "uuid_not_previously_seen",
+                        },
+                    ],
+                },
+            ],
+            model=model_name,
+        )
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize(
+    "image_urls",
+    [TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
+    indirect=True,
+)
+async def test_completions_with_image_with_incorrect_uuid_format(
+    client: openai.AsyncOpenAI,
+    model_name: str,
+    image_urls: list[str],
+):
+    for image_url in image_urls:
+        messages = describe_image_messages(
+            image_url,
+            extra_image_fields={
+                "also_incorrect_uuid_key": image_url,
+            },
+        )
+        # Inject the bad key inside image_url dict too
+        messages[1]["content"][1]["image_url"]["incorrect_uuid_key"] = image_url
+
+        await complete_and_check(
+            client,
+            model_name,
+            messages,
+            context=f"incorrect uuid format url={image_url}",
+        )