[Multimodal][torch.compile] Add compilation config field for turning off ViT/MM compile (#28242)

Signed-off-by: Lucas Kabela <lucaskabela@meta.com>
This commit is contained in:
Lucas Kabela
2025-11-06 16:16:03 -08:00
committed by GitHub
parent 59b453eaa2
commit 4bf56c79cc
4 changed files with 60 additions and 3 deletions

View File

@@ -3,10 +3,17 @@
import pytest
from vllm.compilation.counter import compilation_counter
from vllm.config import VllmConfig
from vllm.config.compilation import CompilationMode
from vllm.platforms import current_platform
def test_compile():
vllm_config = VllmConfig()
# Default configuration compiles mm encoder
assert vllm_config.compilation_config.compile_mm_encoder
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@pytest.mark.forked
@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
@@ -31,8 +38,33 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
vllm_runner(
"Qwen/Qwen2.5-VL-3B-Instruct",
max_model_len=2048,
gpu_memory_utilization=0.7,
gpu_memory_utilization=0.8,
compilation_config={"mode": CompilationMode.VLLM_COMPILE},
) as _,
):
pass
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@pytest.mark.forked
@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
def test_qwen2_5_vl_no_vit_compilation(vllm_runner, monkeypatch):
"""Test that Qwen2.5-VL vision submodules are not compiled when the
config is passed off
"""
# Disable multiprocessing so that the counter is in the same process
monkeypatch.setenv("VLLM_ENABLE_V1_MULTIPROCESSING", "0")
with (
compilation_counter.expect(num_models_seen=1),
vllm_runner(
"Qwen/Qwen2.5-VL-3B-Instruct",
max_model_len=2048,
gpu_memory_utilization=0.8,
compilation_config={
"mode": CompilationMode.VLLM_COMPILE,
"compile_mm_encoder": False,
},
) as _,
):
pass