feat: expose media_io_kwargs at runtime (#34778)

Signed-off-by: Alexandre Milesi <milesial@users.noreply.github.com>
This commit is contained in:
milesial
2026-03-06 20:27:04 -08:00
committed by GitHub
parent 58928475e4
commit 755356b3d1
20 changed files with 298 additions and 16 deletions

View File

@@ -35,6 +35,8 @@ def server():
"--trust-remote-code",
"--limit-mm-per-prompt",
json.dumps({"video": MAXIMUM_VIDEOS}),
"--media-io-kwargs",
json.dumps({"video": {"num_frames": 32}}),
]
# ROCm: Increase timeouts to handle potential network delays and slower
@@ -127,6 +129,73 @@ async def test_single_chat_session_video(
assert message.content is not None and len(message.content) >= 0
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("video_url", [TEST_VIDEO_URLS[0]])
async def test_request_media_io_kwargs_override_uses_fewer_video_frames(
client: openai.AsyncOpenAI, model_name: str, video_url: str
):
messages = dummy_messages_from_video_url(video_url)
default_resp = await client.chat.completions.create(
model=model_name,
messages=messages,
max_completion_tokens=1,
temperature=0.0,
)
override_resp = await client.chat.completions.create(
model=model_name,
messages=messages,
max_completion_tokens=1,
temperature=0.0,
extra_body={
"media_io_kwargs": {
"video": {
"num_frames": 4,
}
}
},
)
assert default_resp.usage is not None
assert override_resp.usage is not None
assert override_resp.usage.prompt_tokens < default_resp.usage.prompt_tokens
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("video_url", [TEST_VIDEO_URLS[0]])
async def test_invalid_num_frames_request_recoverable(
client: openai.AsyncOpenAI, model_name: str, video_url: str
):
messages = dummy_messages_from_video_url(video_url)
with pytest.raises((openai.BadRequestError, openai.APIStatusError)):
await client.chat.completions.create(
model=model_name,
messages=messages,
max_completion_tokens=1,
temperature=0.0,
extra_body={
"media_io_kwargs": {
"video": {
"num_frames": "invalid",
}
}
},
)
# Server should still handle subsequent requests after the failed one.
recovery_resp = await client.chat.completions.create(
model=model_name,
messages=messages,
max_completion_tokens=1,
temperature=0.0,
)
recovery_msg = recovery_resp.choices[0].message
assert recovery_msg.content is not None and len(recovery_msg.content) >= 0
@pytest.mark.asyncio
@pytest.mark.parametrize("model_name", [MODEL_NAME])
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)