[Core] Dynamic image size support for VLMs (#5276)

Signed-off-by: Xiaowei Jiang <xwjiang2010@gmail.com>
Co-authored-by: Xiaowei Jiang <xwjiang2010@gmail.com>
Co-authored-by: ywang96 <ywang@roblox.com>
Co-authored-by: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com>
Co-authored-by: Roger Wang <136131678+ywang96@users.noreply.github.com>
This commit is contained in:
Cyrus Leung
2024-07-03 11:34:00 +08:00
committed by GitHub
parent 482045ee77
commit 9831aec49f
38 changed files with 1453 additions and 664 deletions

View File

@@ -5,10 +5,9 @@ from typing import Dict, Tuple
import numpy as np
import pytest
import pytest_asyncio
from PIL import Image
from vllm.multimodal.utils import ImageFetchAiohttp
from vllm.multimodal.utils import ImageFetchAiohttp, fetch_image
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
TEST_IMAGE_URLS = [
@@ -19,12 +18,9 @@ TEST_IMAGE_URLS = [
]
@pytest_asyncio.fixture(scope="session")
async def url_images() -> Dict[str, Image.Image]:
return {
image_url: await ImageFetchAiohttp.fetch_image(image_url)
for image_url in TEST_IMAGE_URLS
}
@pytest.fixture(scope="module")
def url_images() -> Dict[str, Image.Image]:
return {image_url: fetch_image(image_url) for image_url in TEST_IMAGE_URLS}
def get_supported_suffixes() -> Tuple[str, ...]:
@@ -41,7 +37,15 @@ def _image_equals(a: Image.Image, b: Image.Image) -> bool:
return (np.asarray(a) == np.asarray(b.convert(a.mode))).all()
@pytest.mark.asyncio
@pytest.mark.asyncio(scope="module")
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
async def test_fetch_image_http(image_url: str):
image_sync = fetch_image(image_url)
image_async = await ImageFetchAiohttp.fetch_image(image_url)
assert _image_equals(image_sync, image_async)
@pytest.mark.asyncio(scope="module")
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
@pytest.mark.parametrize("suffix", get_supported_suffixes())
async def test_fetch_image_base64(url_images: Dict[str, Image.Image],
@@ -68,8 +72,11 @@ async def test_fetch_image_base64(url_images: Dict[str, Image.Image],
base64_image = base64.b64encode(f.read()).decode("utf-8")
data_url = f"data:{mime_type};base64,{base64_image}"
data_image = await ImageFetchAiohttp.fetch_image(data_url)
data_image_sync = fetch_image(data_url)
if _image_equals(url_image, Image.open(f)):
assert _image_equals(url_image, data_image)
assert _image_equals(url_image, data_image_sync)
else:
pass # Lossy format; only check that image can be opened
data_image_async = await ImageFetchAiohttp.fetch_image(data_url)
assert _image_equals(data_image_sync, data_image_async)