[Bugfix] Disallow renderer_num_workers > 1 with mm processor cache (#38418)

Signed-off-by: Bvicii <yizhanhuang2002@gmail.com>
This commit is contained in:
Bvicii
2026-03-28 06:32:52 -07:00
committed by GitHub
parent 2bf5b70ae8
commit bda3eda82d
2 changed files with 35 additions and 0 deletions

View File

@@ -1131,6 +1131,28 @@ def test_needs_dp_coordination(
assert vllm_config.needs_dp_coordinator == expected_needs_coordinator
def test_renderer_num_workers_with_mm_cache():
"""Disallow renderer_num_workers > 1 when mm processor cache is enabled,
since neither cache type is thread-safe."""
mm_model = "Qwen/Qwen2-VL-2B-Instruct"
# Should raise: multi-worker + cache enabled (default cache_gb=4)
with pytest.raises(ValueError, match="renderer-num-workers"):
ModelConfig(mm_model, renderer_num_workers=4)
# Should raise: multi-worker + explicit cache size
with pytest.raises(ValueError, match="renderer-num-workers"):
ModelConfig(mm_model, renderer_num_workers=2, mm_processor_cache_gb=1.0)
# Should pass: multi-worker + cache disabled
config = ModelConfig(mm_model, renderer_num_workers=4, mm_processor_cache_gb=0)
assert config.renderer_num_workers == 4
# Should pass: single worker + cache enabled (default)
config = ModelConfig(mm_model, renderer_num_workers=1)
assert config.renderer_num_workers == 1
def test_eagle_draft_model_config():
"""Test that EagleDraft model config is correctly set."""
target_model_config = ModelConfig(