feat: expose media_io_kwargs at runtime (#34778)

Signed-off-by: Alexandre Milesi <milesial@users.noreply.github.com>
2026-03-06 20:27:04 -08:00
parent 58928475e4
commit 755356b3d1
20 changed files with 298 additions and 16 deletions
--- a/tests/entrypoints/openai/test_video.py
+++ b/tests/entrypoints/openai/test_video.py
@@ -35,6 +35,8 @@ def server():
        "--trust-remote-code",
        "--limit-mm-per-prompt",
        json.dumps({"video": MAXIMUM_VIDEOS}),
+        "--media-io-kwargs",
+        json.dumps({"video": {"num_frames": 32}}),
    ]

    # ROCm: Increase timeouts to handle potential network delays and slower
@@ -127,6 +129,73 @@ async def test_single_chat_session_video(
    assert message.content is not None and len(message.content) >= 0


+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("video_url", [TEST_VIDEO_URLS[0]])
+async def test_request_media_io_kwargs_override_uses_fewer_video_frames(
+    client: openai.AsyncOpenAI, model_name: str, video_url: str
+):
+    messages = dummy_messages_from_video_url(video_url)
+
+    default_resp = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=1,
+        temperature=0.0,
+    )
+    override_resp = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=1,
+        temperature=0.0,
+        extra_body={
+            "media_io_kwargs": {
+                "video": {
+                    "num_frames": 4,
+                }
+            }
+        },
+    )
+
+    assert default_resp.usage is not None
+    assert override_resp.usage is not None
+    assert override_resp.usage.prompt_tokens < default_resp.usage.prompt_tokens
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+@pytest.mark.parametrize("video_url", [TEST_VIDEO_URLS[0]])
+async def test_invalid_num_frames_request_recoverable(
+    client: openai.AsyncOpenAI, model_name: str, video_url: str
+):
+    messages = dummy_messages_from_video_url(video_url)
+
+    with pytest.raises((openai.BadRequestError, openai.APIStatusError)):
+        await client.chat.completions.create(
+            model=model_name,
+            messages=messages,
+            max_completion_tokens=1,
+            temperature=0.0,
+            extra_body={
+                "media_io_kwargs": {
+                    "video": {
+                        "num_frames": "invalid",
+                    }
+                }
+            },
+        )
+
+    # Server should still handle subsequent requests after the failed one.
+    recovery_resp = await client.chat.completions.create(
+        model=model_name,
+        messages=messages,
+        max_completion_tokens=1,
+        temperature=0.0,
+    )
+    recovery_msg = recovery_resp.choices[0].message
+    assert recovery_msg.content is not None and len(recovery_msg.content) >= 0
+
+
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
--- a/tests/entrypoints/pooling/embed/test_online_vision.py
+++ b/tests/entrypoints/pooling/embed/test_online_vision.py
@@ -127,6 +127,39 @@ def test_chat_image_base64_request(server: RemoteOpenAIServer, model_name: str):
    assert output.usage.prompt_tokens == 767


+@pytest.mark.parametrize("model_name", [MODEL_NAME])
+def test_chat_image_with_media_io_kwargs(server: RemoteOpenAIServer, model_name: str):
+    rgba_image_url = (
+        "https://vllm-public-assets.s3.us-west-2.amazonaws.com"
+        "/vision_model_images/RGBA_comp.png"
+    )
+    messages = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "Represent the user's input."},
+                {"type": "image_url", "image_url": {"url": rgba_image_url}},
+            ],
+        }
+    ]
+
+    response = requests.post(
+        server.url_for("v1/embeddings"),
+        json={
+            "model": model_name,
+            "messages": messages,
+            "media_io_kwargs": {
+                "image": {"rgba_background_color": [0, 0, 0]},
+            },
+        },
+    )
+    response.raise_for_status()
+
+    output = EmbeddingResponse.model_validate(response.json())
+    assert len(output.data) == 1
+    assert len(output.data[0].embedding) == 3072
+
+
 def get_hf_prompt_tokens(model_name, content, image_url):
    processor = AutoProcessor.from_pretrained(
        model_name, trust_remote_code=True, num_crops=4