[Frontend] Support configurable mm placeholder strings & flexible video sampling policies via CLI flags. (#20105)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
@@ -167,14 +167,14 @@ async def test_fetch_image_error_conversion():
|
||||
@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS)
|
||||
@pytest.mark.parametrize("num_frames", [-1, 32, 1800])
|
||||
async def test_fetch_video_http(video_url: str, num_frames: int):
|
||||
connector = MediaConnector()
|
||||
connector = MediaConnector(
|
||||
media_io_kwargs={"video": {
|
||||
"num_frames": num_frames,
|
||||
}})
|
||||
|
||||
video_sync = connector.fetch_video(video_url, num_frames=num_frames)
|
||||
video_async = await connector.fetch_video_async(video_url,
|
||||
num_frames=num_frames)
|
||||
# Check that the video frames are equal and metadata are same
|
||||
video_sync = connector.fetch_video(video_url)
|
||||
video_async = await connector.fetch_video_async(video_url)
|
||||
assert np.array_equal(video_sync[0], video_async[0])
|
||||
assert video_sync[1] == video_async[1]
|
||||
|
||||
|
||||
# Used for the next two tests related to `merge_and_sort_multimodal_metadata`.
|
||||
|
||||
@@ -4,7 +4,10 @@ import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pytest
|
||||
|
||||
from vllm.multimodal.video import VIDEO_LOADER_REGISTRY, VideoLoader
|
||||
from vllm import envs
|
||||
from vllm.multimodal.image import ImageMediaIO
|
||||
from vllm.multimodal.video import (VIDEO_LOADER_REGISTRY, VideoLoader,
|
||||
VideoMediaIO)
|
||||
|
||||
NUM_FRAMES = 10
|
||||
FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
|
||||
@@ -40,3 +43,46 @@ def test_video_loader_registry():
|
||||
def test_video_loader_type_doesnt_exist():
|
||||
with pytest.raises(AssertionError):
|
||||
VIDEO_LOADER_REGISTRY.load("non_existing_video_loader")
|
||||
|
||||
|
||||
@VIDEO_LOADER_REGISTRY.register("assert_10_frames_1_fps")
|
||||
class Assert10Frames1FPSVideoLoader(VideoLoader):
|
||||
|
||||
@classmethod
|
||||
def load_bytes(cls,
|
||||
data: bytes,
|
||||
num_frames: int = -1,
|
||||
fps: float = -1.0,
|
||||
**kwargs) -> npt.NDArray:
|
||||
assert num_frames == 10, "bad num_frames"
|
||||
assert fps == 1.0, "bad fps"
|
||||
return FAKE_OUTPUT_2
|
||||
|
||||
|
||||
def test_video_media_io_kwargs():
|
||||
envs.VLLM_VIDEO_LOADER_BACKEND = "assert_10_frames_1_fps"
|
||||
imageio = ImageMediaIO()
|
||||
|
||||
# Verify that different args pass/fail assertions as expected.
|
||||
videoio = VideoMediaIO(imageio, **{"num_frames": 10, "fps": 1.0})
|
||||
_ = videoio.load_bytes(b"test")
|
||||
|
||||
videoio = VideoMediaIO(
|
||||
imageio, **{
|
||||
"num_frames": 10,
|
||||
"fps": 1.0,
|
||||
"not_used": "not_used"
|
||||
})
|
||||
_ = videoio.load_bytes(b"test")
|
||||
|
||||
with pytest.raises(AssertionError, match="bad num_frames"):
|
||||
videoio = VideoMediaIO(imageio, **{})
|
||||
_ = videoio.load_bytes(b"test")
|
||||
|
||||
with pytest.raises(AssertionError, match="bad num_frames"):
|
||||
videoio = VideoMediaIO(imageio, **{"num_frames": 9, "fps": 1.0})
|
||||
_ = videoio.load_bytes(b"test")
|
||||
|
||||
with pytest.raises(AssertionError, match="bad fps"):
|
||||
videoio = VideoMediaIO(imageio, **{"num_frames": 10, "fps": 2.0})
|
||||
_ = videoio.load_bytes(b"test")
|
||||
|
||||
Reference in New Issue
Block a user