[Multi Modal] Configurable MM Profiling (#25631)

Signed-off-by: wwl2755 <wangwenlong2755@gmail.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-03 03:59:10 -07:00
parent 2ed3f20dba
commit 79aa244678
60 changed files with 654 additions and 99 deletions
--- a/tests/models/multimodal/processing/test_mllama4.py
+++ b/tests/models/multimodal/processing/test_mllama4.py
@@ -17,23 +17,23 @@ def test_profiling(model_id: str, max_model_len: int):
    model_config_kwargs = {
        "max_model_len": max_model_len,
    }
+    mm_counts = {"image": 1}
    ctx = build_model_context(
        model_id,
        model_config_kwargs=model_config_kwargs,
-        limit_mm_per_prompt={"image": 1},
+        limit_mm_per_prompt=mm_counts,
    )

-    mm_config = ctx.get_mm_config()
    processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
    profiler = MultiModalProfiler(processor)

    decoder_dummy_data = profiler.get_decoder_dummy_data(
        max_model_len,
-        mm_counts=mm_config.limit_per_prompt,
+        mm_counts=mm_counts,
    )
    dummy_mm_data = processor.dummy_inputs.get_dummy_processor_inputs(
        max_model_len,
-        mm_counts=mm_config.limit_per_prompt,
+        mm_counts=mm_counts,
    )

    hf_config = ctx.get_hf_config(Llama4Config)
@@ -58,7 +58,7 @@ def test_profiling(model_id: str, max_model_len: int):

    profiled_tokens = profiler.get_mm_max_contiguous_tokens(
        max_model_len,
-        mm_counts=mm_config.limit_per_prompt,
+        mm_counts=mm_counts,
    )

    assert total_tokens == profiled_tokens["image"]