[Bugfix] Fix max image feature size for Llava-one-vision (#12104)

Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
Roger Wang
2025-01-16 06:54:06 -08:00
committed by GitHub
parent 92e793d91a
commit 874f7c292a
3 changed files with 129 additions and 2 deletions

View File

@@ -13,6 +13,68 @@ from vllm.multimodal.utils import cached_get_tokenizer
from ...utils import build_model_context
def _validate_image_max_tokens_one(
processor: BaseMultiModalProcessor,
max_tokens: int,
failed_size_excs: list[tuple[ImageSize, Exception]],
image_size: ImageSize,
) -> None:
info = processor.info
feature_size = info.get_num_image_tokens(image_width=image_size.width,
image_height=image_size.height)
try:
assert feature_size <= max_tokens, f"{feature_size} <= {max_tokens}"
except Exception as exc:
failed_size_excs.append((image_size, exc))
@pytest.mark.skip("This test takes around 5 minutes to run. "
"Comment this out to run it manually.")
@pytest.mark.parametrize("model_id",
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
def test_processor_max_tokens(model_id):
ctx = build_model_context(
model_name=model_id,
tokenizer_name=model_id,
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1},
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=cached_get_tokenizer(ctx.model_config.tokenizer),
)
info = processor.info
seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]()
# The aspect ratio of the grid layout is between 1 and 6
# NOTE: Assumes that feature size calculation is the same if we
# swap the width and height of the image
for w, h in itertools.product(range(32, 4096), repeat=2):
aspect_ratio = w / h
if 1 <= aspect_ratio <= 6 and aspect_ratio not in seen_aspect_ratios:
image_sizes.append(ImageSize(w, h))
seen_aspect_ratios.add(aspect_ratio)
failed_size_excs = list[tuple[ImageSize, Exception]]()
validate_one = partial(
_validate_image_max_tokens_one,
processor,
info.get_max_image_tokens(), # type: ignore
failed_size_excs,
)
pqdm(image_sizes, validate_one, n_jobs=8, desc="Validating image sizes")
if failed_size_excs:
msg = "Found failing image sizes:" \
+ "\n========\n".join(f"[{size}]\n{exc}"
for size, exc in failed_size_excs)
raise AssertionError(msg)
def _validate_image_prompt_replacements_one(
processor: BaseMultiModalProcessor,
num_imgs: int,