feat: expose media_io_kwargs at runtime (#34778)
Signed-off-by: Alexandre Milesi <milesial@users.noreply.github.com>
This commit is contained in:
@@ -35,6 +35,8 @@ def server():
|
||||
"--trust-remote-code",
|
||||
"--limit-mm-per-prompt",
|
||||
json.dumps({"video": MAXIMUM_VIDEOS}),
|
||||
"--media-io-kwargs",
|
||||
json.dumps({"video": {"num_frames": 32}}),
|
||||
]
|
||||
|
||||
# ROCm: Increase timeouts to handle potential network delays and slower
|
||||
@@ -127,6 +129,73 @@ async def test_single_chat_session_video(
|
||||
assert message.content is not None and len(message.content) >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("video_url", [TEST_VIDEO_URLS[0]])
|
||||
async def test_request_media_io_kwargs_override_uses_fewer_video_frames(
|
||||
client: openai.AsyncOpenAI, model_name: str, video_url: str
|
||||
):
|
||||
messages = dummy_messages_from_video_url(video_url)
|
||||
|
||||
default_resp = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=1,
|
||||
temperature=0.0,
|
||||
)
|
||||
override_resp = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=1,
|
||||
temperature=0.0,
|
||||
extra_body={
|
||||
"media_io_kwargs": {
|
||||
"video": {
|
||||
"num_frames": 4,
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
assert default_resp.usage is not None
|
||||
assert override_resp.usage is not None
|
||||
assert override_resp.usage.prompt_tokens < default_resp.usage.prompt_tokens
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("video_url", [TEST_VIDEO_URLS[0]])
|
||||
async def test_invalid_num_frames_request_recoverable(
|
||||
client: openai.AsyncOpenAI, model_name: str, video_url: str
|
||||
):
|
||||
messages = dummy_messages_from_video_url(video_url)
|
||||
|
||||
with pytest.raises((openai.BadRequestError, openai.APIStatusError)):
|
||||
await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=1,
|
||||
temperature=0.0,
|
||||
extra_body={
|
||||
"media_io_kwargs": {
|
||||
"video": {
|
||||
"num_frames": "invalid",
|
||||
}
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
# Server should still handle subsequent requests after the failed one.
|
||||
recovery_resp = await client.chat.completions.create(
|
||||
model=model_name,
|
||||
messages=messages,
|
||||
max_completion_tokens=1,
|
||||
temperature=0.0,
|
||||
)
|
||||
recovery_msg = recovery_resp.choices[0].message
|
||||
assert recovery_msg.content is not None and len(recovery_msg.content) >= 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
|
||||
|
||||
@@ -127,6 +127,39 @@ def test_chat_image_base64_request(server: RemoteOpenAIServer, model_name: str):
|
||||
assert output.usage.prompt_tokens == 767
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_name", [MODEL_NAME])
|
||||
def test_chat_image_with_media_io_kwargs(server: RemoteOpenAIServer, model_name: str):
|
||||
rgba_image_url = (
|
||||
"https://vllm-public-assets.s3.us-west-2.amazonaws.com"
|
||||
"/vision_model_images/RGBA_comp.png"
|
||||
)
|
||||
messages = [
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{"type": "text", "text": "Represent the user's input."},
|
||||
{"type": "image_url", "image_url": {"url": rgba_image_url}},
|
||||
],
|
||||
}
|
||||
]
|
||||
|
||||
response = requests.post(
|
||||
server.url_for("v1/embeddings"),
|
||||
json={
|
||||
"model": model_name,
|
||||
"messages": messages,
|
||||
"media_io_kwargs": {
|
||||
"image": {"rgba_background_color": [0, 0, 0]},
|
||||
},
|
||||
},
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
output = EmbeddingResponse.model_validate(response.json())
|
||||
assert len(output.data) == 1
|
||||
assert len(output.data[0].embedding) == 3072
|
||||
|
||||
|
||||
def get_hf_prompt_tokens(model_name, content, image_url):
|
||||
processor = AutoProcessor.from_pretrained(
|
||||
model_name, trust_remote_code=True, num_crops=4
|
||||
|
||||
Reference in New Issue
Block a user