diff --git a/tests/test_config.py b/tests/test_config.py index f98b30f99..f07a649ca 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1131,6 +1131,28 @@ def test_needs_dp_coordination( assert vllm_config.needs_dp_coordinator == expected_needs_coordinator +def test_renderer_num_workers_with_mm_cache(): + """Disallow renderer_num_workers > 1 when mm processor cache is enabled, + since neither cache type is thread-safe.""" + mm_model = "Qwen/Qwen2-VL-2B-Instruct" + + # Should raise: multi-worker + cache enabled (default cache_gb=4) + with pytest.raises(ValueError, match="renderer-num-workers"): + ModelConfig(mm_model, renderer_num_workers=4) + + # Should raise: multi-worker + explicit cache size + with pytest.raises(ValueError, match="renderer-num-workers"): + ModelConfig(mm_model, renderer_num_workers=2, mm_processor_cache_gb=1.0) + + # Should pass: multi-worker + cache disabled + config = ModelConfig(mm_model, renderer_num_workers=4, mm_processor_cache_gb=0) + assert config.renderer_num_workers == 4 + + # Should pass: single worker + cache enabled (default) + config = ModelConfig(mm_model, renderer_num_workers=1) + assert config.renderer_num_workers == 1 + + def test_eagle_draft_model_config(): """Test that EagleDraft model config is correctly set.""" target_model_config = ModelConfig( diff --git a/vllm/config/model.py b/vllm/config/model.py index acb43a04b..b8c601334 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -647,6 +647,19 @@ class ModelConfig: self.multimodal_config = MultiModalConfig(**mm_config_kwargs) # type: ignore[arg-type] + if ( + self.renderer_num_workers > 1 + and self.multimodal_config.mm_processor_cache_gb > 0 + ): + raise ValueError( + "Cannot use --renderer-num-workers > 1 with the " + "multimodal processor cache enabled. The cache is " + "not thread-safe and does not support concurrent " + "renderer workers. Please set " + "--renderer-num-workers 1 (the default), or " + "disable the cache with --mm-processor-cache-gb 0." + ) + # Multimodal GGUF models must use original repo for mm processing if is_gguf(self.tokenizer) and self.is_multimodal_model: raise ValueError(