[Bugfix] Standardize getting number of image patches/tokens (#34358)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -3,7 +3,9 @@
|
||||
"""Tests for Idefics3's multimodal preprocessing kwargs."""
|
||||
|
||||
import pytest
|
||||
from packaging.version import Version
|
||||
from transformers import Idefics3Config
|
||||
from transformers import __version__ as TRANSFORMERS_VERSION
|
||||
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
|
||||
@@ -11,6 +13,10 @@ from ....conftest import ImageTestAssets
|
||||
from ...utils import build_model_context
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
Version(TRANSFORMERS_VERSION) < Version("5.2.0"),
|
||||
reason="See https://github.com/huggingface/transformers/pull/43948",
|
||||
)
|
||||
@pytest.mark.parametrize("model_id", ["HuggingFaceM4/Idefics3-8B-Llama3"])
|
||||
@pytest.mark.parametrize(
|
||||
("mm_processor_kwargs", "expected_toks_per_img"),
|
||||
@@ -63,7 +69,11 @@ def test_processor_override(
|
||||
|
||||
# Ensure the placeholders format are correct
|
||||
hf_processor = processor.info.get_hf_processor(**hf_processor_mm_kwargs)
|
||||
hf_processed_inputs = hf_processor(text=prompt, images=mm_data["image"])
|
||||
hf_processed_inputs = hf_processor(
|
||||
text=prompt,
|
||||
images=mm_data["image"],
|
||||
**processor.info.ctx.get_merged_mm_kwargs(hf_processor_mm_kwargs),
|
||||
)
|
||||
assert processed_inputs["prompt_token_ids"] == hf_processed_inputs["input_ids"][0]
|
||||
|
||||
# Ensure we have the right number of placeholders per num_crops size
|
||||
|
||||
Reference in New Issue
Block a user