[CI/Build] Refactor image test assets (#5821)

This commit is contained in:
Cyrus Leung
2024-06-26 16:02:34 +08:00
committed by GitHub
parent 3439c5a8e3
commit 6984c02a27
5 changed files with 127 additions and 92 deletions

View File

@@ -10,7 +10,7 @@ from ..conftest import _STR_DTYPE_TO_TORCH_DTYPE
@pytest.mark.parametrize("dtype", ["half", "float"])
def test_clip_image_processor(hf_images, dtype):
def test_clip_image_processor(image_assets, dtype):
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
IMAGE_HEIGHT = IMAGE_WIDTH = 560
@@ -35,13 +35,13 @@ def test_clip_image_processor(hf_images, dtype):
image_processor_revision=None,
)
for image in hf_images:
for asset in image_assets:
hf_result = hf_processor.preprocess(
image,
asset.pil_image,
return_tensors="pt",
).to(dtype=_STR_DTYPE_TO_TORCH_DTYPE[dtype])
vllm_result = MULTIMODAL_REGISTRY.process_input(
ImagePixelData(image),
ImagePixelData(asset.pil_image),
model_config=model_config,
vlm_config=vlm_config,
)
@@ -59,7 +59,7 @@ def test_clip_image_processor(hf_images, dtype):
reason="Inconsistent image processor being used due to lack "
"of support for dynamic image token replacement")
@pytest.mark.parametrize("dtype", ["half", "float"])
def test_llava_next_image_processor(hf_images, dtype):
def test_llava_next_image_processor(image_assets, dtype):
MODEL_NAME = "llava-hf/llava-v1.6-34b-hf"
IMAGE_HEIGHT = IMAGE_WIDTH = 560
@@ -84,13 +84,13 @@ def test_llava_next_image_processor(hf_images, dtype):
image_processor_revision=None,
)
for image in hf_images:
for asset in image_assets:
hf_result = hf_processor.preprocess(
image,
asset.pil_image,
return_tensors="pt",
).to(dtype=_STR_DTYPE_TO_TORCH_DTYPE[dtype])
vllm_result = MULTIMODAL_REGISTRY.process_input(
ImagePixelData(image),
ImagePixelData(asset.pil_image),
model_config=model_config,
vlm_config=vlm_config,
)
@@ -107,7 +107,7 @@ def test_llava_next_image_processor(hf_images, dtype):
@pytest.mark.xfail(
reason="Example image pixels were not processed using HuggingFace")
@pytest.mark.parametrize("dtype", ["float"])
def test_image_pixel_types(hf_images, vllm_image_tensors, dtype):
def test_image_pixel_types(image_assets, dtype):
MODEL_NAME = "llava-hf/llava-1.5-7b-hf"
IMAGE_HEIGHT = IMAGE_WIDTH = 560
@@ -129,14 +129,14 @@ def test_image_pixel_types(hf_images, vllm_image_tensors, dtype):
image_processor_revision=None,
)
for image, tensor in zip(hf_images, vllm_image_tensors):
for asset in image_assets:
image_result = MULTIMODAL_REGISTRY.process_input(
ImagePixelData(image),
ImagePixelData(asset.pil_image),
model_config=model_config,
vlm_config=vlm_config,
)
tensor_result = MULTIMODAL_REGISTRY.process_input(
ImagePixelData(tensor),
ImagePixelData(asset.pixel_values),
model_config=model_config,
vlm_config=vlm_config,
)