[VLM] Move supported limits and max tokens to merged multi-modal processor (#11669)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com>
2025-01-01 23:44:42 +08:00
parent 73001445fb
commit a115ac46b5
16 changed files with 351 additions and 361 deletions
--- a/tests/multimodal/test_processing.py
+++ b/tests/multimodal/test_processing.py
@@ -538,6 +538,11 @@ def _test_processing_cache_correctness(
    else:
        hf_overrides = {}

+    limit_mm_per_prompt = {
+        modality: 3 if supports_multi else 1
+        for modality, supports_multi in modalities.items()
+    }
+
    model_config = ModelConfig(
        model_id,
        task="auto",
@@ -548,6 +553,7 @@ def _test_processing_cache_correctness(
        dtype="float16",
        revision=None,
        hf_overrides=hf_overrides,
+        limit_mm_per_prompt=limit_mm_per_prompt,
    )
    model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)

@@ -580,18 +586,14 @@ def _test_processing_cache_correctness(
                min_wh=128,
                max_wh=256),
        "audio":
-        partial(_rand_audio, rng, min_len=256, max_len=512, sr=16000),
-    }
-    input_max_count = {
-        modality: 3 if supports_multi else 1
-        for modality, supports_multi in modalities.items()
+        partial(_rand_audio, rng, min_len=512, max_len=1024, sr=16000),
    }

    for batch_idx in range(num_batches):
        mm_data = {
            k:
            [(input_to_hit[k] if rng.rand() < hit_rate else input_factory[k]())
-             for _ in range(rng.randint(input_max_count[k]))]
+             for _ in range(rng.randint(limit_mm_per_prompt[k]))]
            for k in modalities
        }