[Refactor] Relocate completion and chat completion tests (#37125)

Signed-off-by: sfeng33 <4florafeng@gmail.com>
2026-03-16 23:31:23 -04:00
parent f04d5226f8
commit 384dc7f77b
26 changed files with 41 additions and 48 deletions
--- a/tests/entrypoints/openai/chat_completion/test_audio_in_video.py
+++ b/tests/entrypoints/openai/chat_completion/test_audio_in_video.py
@@ -0,0 +1,175 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import base64
+import json
+
+import openai
+import pytest
+import pytest_asyncio
+
+from tests.conftest import VideoTestAssets
+from tests.utils import RemoteOpenAIServer
+
+MODEL_NAME = "Qwen/Qwen2.5-Omni-3B"
+
+
+@pytest.fixture
+def server():
+    args = [
+        "--max-model-len",
+        "16384",
+        "--enforce-eager",
+        "--limit-mm-per-prompt",
+        json.dumps({"audio": 3, "video": 3}),
+    ]
+
+    with RemoteOpenAIServer(
+        MODEL_NAME,
+        args,
+    ) as remote_server:
+        yield remote_server
+
+
+@pytest_asyncio.fixture
+async def client(server):
+    async with server.get_async_client() as async_client:
+        yield async_client
+
+
+@pytest.mark.core_model
+@pytest.mark.asyncio
+async def test_online_audio_in_video(
+    client: openai.AsyncOpenAI, video_assets: VideoTestAssets
+):
+    """Test video input with `audio_in_video=True`"""
+
+    # we don't use video_urls above because they missed audio stream.
+    video_path = video_assets[0].video_path
+    with open(video_path, "rb") as f:
+        video_base64 = base64.b64encode(f.read()).decode("utf-8")
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in this video?"},
+                {
+                    "type": "video_url",
+                    "video_url": {"url": f"data:video/mp4;base64,{video_base64}"},
+                },
+            ],
+        }
+    ]
+
+    # multi-turn to test mm processor cache as well
+    for _ in range(2):
+        chat_completion = await client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=messages,
+            max_tokens=16,
+            extra_body={
+                "mm_processor_kwargs": {
+                    "use_audio_in_video": True,
+                }
+            },
+        )
+
+        assert len(chat_completion.choices) == 1
+        choice = chat_completion.choices[0]
+        assert choice.finish_reason == "length"
+
+
+@pytest.mark.core_model
+@pytest.mark.asyncio
+async def test_online_audio_in_video_multi_videos(
+    client: openai.AsyncOpenAI, video_assets: VideoTestAssets
+):
+    """Test multi-video input with `audio_in_video=True`"""
+
+    # we don't use video_urls above because they missed audio stream.
+    video_path = video_assets[0].video_path
+    with open(video_path, "rb") as f:
+        video_base64 = base64.b64encode(f.read()).decode("utf-8")
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in these two videos?"},
+                {
+                    "type": "video_url",
+                    "video_url": {"url": f"data:video/mp4;base64,{video_base64}"},
+                },
+                {
+                    "type": "video_url",
+                    "video_url": {"url": f"data:video/mp4;base64,{video_base64}"},
+                },
+            ],
+        }
+    ]
+
+    # multi-turn to test mm processor cache as well
+    for _ in range(2):
+        chat_completion = await client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=messages,
+            max_tokens=16,
+            extra_body={
+                "mm_processor_kwargs": {
+                    "use_audio_in_video": True,
+                }
+            },
+        )
+
+        assert len(chat_completion.choices) == 1
+        choice = chat_completion.choices[0]
+        assert choice.finish_reason == "length"
+
+
+@pytest.mark.core_model
+@pytest.mark.asyncio
+async def test_online_audio_in_video_interleaved(
+    client: openai.AsyncOpenAI, video_assets: VideoTestAssets
+):
+    """Test interleaved video/audio input with `audio_in_video=True`"""
+
+    # we don't use video_urls above because they missed audio stream.
+    video_path = video_assets[0].video_path
+    with open(video_path, "rb") as f:
+        video_base64 = base64.b64encode(f.read()).decode("utf-8")
+
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "What's in these two videos?"},
+                {
+                    "type": "video_url",
+                    "video_url": {"url": f"data:video/mp4;base64,{video_base64}"},
+                },
+                {
+                    "type": "audio_url",
+                    "audio_url": {"url": f"data:audio/mp4;base64,{video_base64}"},
+                },
+                {
+                    "type": "video_url",
+                    "video_url": {"url": f"data:video/mp4;base64,{video_base64}"},
+                },
+            ],
+        }
+    ]
+    with pytest.raises(
+        openai.BadRequestError,
+        match="use_audio_in_video requires equal number of audio and video items",
+    ):
+        await client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=messages,
+            max_tokens=16,
+            extra_body={
+                "mm_processor_kwargs": {
+                    "use_audio_in_video": True,
+                }
+            },
+        )