[Multimodal][torch.compile] Add compilation config field for turning off ViT/MM compile (#28242)

Signed-off-by: Lucas Kabela <lucaskabela@meta.com>
2025-11-06 16:16:03 -08:00
parent 59b453eaa2
commit 4bf56c79cc
4 changed files with 60 additions and 3 deletions
--- a/tests/compile/test_multimodal_compile.py
+++ b/tests/compile/test_multimodal_compile.py
@@ -3,10 +3,17 @@
 import pytest

 from vllm.compilation.counter import compilation_counter
+from vllm.config import VllmConfig
 from vllm.config.compilation import CompilationMode
 from vllm.platforms import current_platform


+def test_compile():
+    vllm_config = VllmConfig()
+    # Default configuration compiles mm encoder
+    assert vllm_config.compilation_config.compile_mm_encoder
+
+
 # forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@pytest.mark.forked
@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
@@ -31,8 +38,33 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
        vllm_runner(
            "Qwen/Qwen2.5-VL-3B-Instruct",
            max_model_len=2048,
-            gpu_memory_utilization=0.7,
+            gpu_memory_utilization=0.8,
            compilation_config={"mode": CompilationMode.VLLM_COMPILE},
        ) as _,
    ):
        pass
+
+
+# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
+@pytest.mark.forked
+@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
+def test_qwen2_5_vl_no_vit_compilation(vllm_runner, monkeypatch):
+    """Test that Qwen2.5-VL vision submodules are not compiled when the
+    config is passed off
+    """
+    # Disable multiprocessing so that the counter is in the same process
+    monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
+
+    with (
+        compilation_counter.expect(num_models_seen=1),
+        vllm_runner(
+            "Qwen/Qwen2.5-VL-3B-Instruct",
+            max_model_len=2048,
+            gpu_memory_utilization=0.8,
+            compilation_config={
+                "mode": CompilationMode.VLLM_COMPILE,
+                "compile_mm_encoder": False,
+            },
+        ) as _,
+    ):
+        pass