[Bugfix] Fix max image feature size for Llava-one-vision (#12104)
Signed-off-by: Roger Wang <ywang@roblox.com>
This commit is contained in:
@@ -13,6 +13,67 @@ from vllm.multimodal.utils import cached_get_tokenizer
|
||||
from ...utils import build_model_context
|
||||
|
||||
|
||||
def _validate_image_max_tokens_one(
|
||||
processor: BaseMultiModalProcessor,
|
||||
max_tokens: int,
|
||||
failed_size_excs: list[tuple[ImageSize, Exception]],
|
||||
image_size: ImageSize,
|
||||
) -> None:
|
||||
info = processor.info
|
||||
feature_size = info.get_num_image_tokens(image_width=image_size.width,
|
||||
image_height=image_size.height)
|
||||
|
||||
try:
|
||||
assert feature_size <= max_tokens, f"{feature_size} <= {max_tokens}"
|
||||
except Exception as exc:
|
||||
failed_size_excs.append((image_size, exc))
|
||||
|
||||
|
||||
@pytest.mark.skip("This test takes around 5 minutes to run. "
|
||||
"Comment this out to run it manually.")
|
||||
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
|
||||
def test_processor_max_tokens(model_id):
|
||||
ctx = build_model_context(
|
||||
model_name=model_id,
|
||||
tokenizer_name=model_id,
|
||||
mm_processor_kwargs=None,
|
||||
limit_mm_per_prompt={"image": 1},
|
||||
)
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(
|
||||
ctx.model_config,
|
||||
tokenizer=cached_get_tokenizer(ctx.model_config.tokenizer),
|
||||
)
|
||||
info = processor.info
|
||||
|
||||
seen_aspect_ratios = set[float]()
|
||||
image_sizes = list[ImageSize]()
|
||||
|
||||
# The aspect ratio of the grid layout is between 1 and 2
|
||||
# NOTE: Assumes that feature size calculation is the same if we
|
||||
# swap the width and height of the image
|
||||
for w, h in itertools.product(range(32, 4096), repeat=2):
|
||||
aspect_ratio = w / h
|
||||
if 1 <= aspect_ratio <= 2 and aspect_ratio not in seen_aspect_ratios:
|
||||
image_sizes.append(ImageSize(w, h))
|
||||
seen_aspect_ratios.add(aspect_ratio)
|
||||
|
||||
failed_size_excs = list[tuple[ImageSize, Exception]]()
|
||||
|
||||
validate_one = partial(
|
||||
_validate_image_max_tokens_one,
|
||||
processor,
|
||||
info.get_max_image_tokens(), # type: ignore
|
||||
failed_size_excs,
|
||||
)
|
||||
pqdm(image_sizes, validate_one, n_jobs=8, desc="Validating image sizes")
|
||||
|
||||
if failed_size_excs:
|
||||
msg = "Found failing image sizes:" \
|
||||
+ "\n========\n".join(f"[{size}]\n{exc}"
|
||||
for size, exc in failed_size_excs)
|
||||
raise AssertionError(msg)
|
||||
|
||||
|
||||
def _validate_image_prompt_replacements_one(
|
||||
processor: BaseMultiModalProcessor,
|
||||
num_imgs: int,
|
||||
|
||||
Reference in New Issue
Block a user