From e2fd9a2edf0d8d5de329a3470906118d15e29a7f Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Thu, 13 Nov 2025 08:38:08 -0800 Subject: [PATCH] [Misc] Turn off encoder torch compile by default (#28634) Signed-off-by: Roger Wang (cherry picked from commit d3387750f191f3bcf6607db95436147bbccfacb3) --- tests/compile/test_multimodal_compile.py | 9 ++++++--- tests/models/multimodal/generation/test_common.py | 2 ++ vllm/config/compilation.py | 5 +++-- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/compile/test_multimodal_compile.py b/tests/compile/test_multimodal_compile.py index b76c29819..621f6a51a 100644 --- a/tests/compile/test_multimodal_compile.py +++ b/tests/compile/test_multimodal_compile.py @@ -10,8 +10,8 @@ from vllm.platforms import current_platform def test_compile(): vllm_config = VllmConfig() - # Default configuration compiles mm encoder - assert vllm_config.compilation_config.compile_mm_encoder + # Default configuration does not compile mm encoder + assert not vllm_config.compilation_config.compile_mm_encoder # forked needed to workaround https://github.com/vllm-project/vllm/issues/21073 @@ -39,7 +39,10 @@ def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch): "Qwen/Qwen2.5-VL-3B-Instruct", max_model_len=2048, gpu_memory_utilization=0.8, - compilation_config={"mode": CompilationMode.VLLM_COMPILE}, + compilation_config={ + "mode": CompilationMode.VLLM_COMPILE, + "compile_mm_encoder": True, + }, ) as _, ): pass diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 691ea1070..84ccc0bb7 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -139,6 +139,7 @@ VLM_TEST_SETTINGS = { prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", + enforce_eager=False, max_model_len=4096, max_num_seqs=2, auto_cls=AutoModelForImageTextToText, @@ -168,6 +169,7 @@ VLM_TEST_SETTINGS = { VLMTestType.MULTI_IMAGE, VLMTestType.VIDEO, ), + enforce_eager=False, needs_video_metadata=True, prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>assistant\n", # noqa: E501 img_idx_to_prompt=lambda idx: "<|vision_start|><|image_pad|><|vision_end|>", # noqa: E501 diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 30d33c057..4620fb985 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -266,9 +266,10 @@ class CompilationConfig: If None, defaults to attention ops for piecewise cudagraphs. If empty list [], no ops are excluded (suitable for full cudagraphs).""" - compile_mm_encoder: bool = True + compile_mm_encoder: bool = False """Whether or not to compile the multimodal encoder. - Currently, this only works for `Qwen2_5_vl`.""" + Currently, this only works for `Qwen2_5_vl` on selected platforms. + Disabled by default until more models are supported/tested to work.""" # Inductor capture use_inductor: bool | None = None