[Bugfix] Disallow renderer_num_workers > 1 with mm processor cache (#38418)
Signed-off-by: Bvicii <yizhanhuang2002@gmail.com>
This commit is contained in:
@@ -1131,6 +1131,28 @@ def test_needs_dp_coordination(
|
||||
assert vllm_config.needs_dp_coordinator == expected_needs_coordinator
|
||||
|
||||
|
||||
def test_renderer_num_workers_with_mm_cache():
|
||||
"""Disallow renderer_num_workers > 1 when mm processor cache is enabled,
|
||||
since neither cache type is thread-safe."""
|
||||
mm_model = "Qwen/Qwen2-VL-2B-Instruct"
|
||||
|
||||
# Should raise: multi-worker + cache enabled (default cache_gb=4)
|
||||
with pytest.raises(ValueError, match="renderer-num-workers"):
|
||||
ModelConfig(mm_model, renderer_num_workers=4)
|
||||
|
||||
# Should raise: multi-worker + explicit cache size
|
||||
with pytest.raises(ValueError, match="renderer-num-workers"):
|
||||
ModelConfig(mm_model, renderer_num_workers=2, mm_processor_cache_gb=1.0)
|
||||
|
||||
# Should pass: multi-worker + cache disabled
|
||||
config = ModelConfig(mm_model, renderer_num_workers=4, mm_processor_cache_gb=0)
|
||||
assert config.renderer_num_workers == 4
|
||||
|
||||
# Should pass: single worker + cache enabled (default)
|
||||
config = ModelConfig(mm_model, renderer_num_workers=1)
|
||||
assert config.renderer_num_workers == 1
|
||||
|
||||
|
||||
def test_eagle_draft_model_config():
|
||||
"""Test that EagleDraft model config is correctly set."""
|
||||
target_model_config = ModelConfig(
|
||||
|
||||
@@ -647,6 +647,19 @@ class ModelConfig:
|
||||
|
||||
self.multimodal_config = MultiModalConfig(**mm_config_kwargs) # type: ignore[arg-type]
|
||||
|
||||
if (
|
||||
self.renderer_num_workers > 1
|
||||
and self.multimodal_config.mm_processor_cache_gb > 0
|
||||
):
|
||||
raise ValueError(
|
||||
"Cannot use --renderer-num-workers > 1 with the "
|
||||
"multimodal processor cache enabled. The cache is "
|
||||
"not thread-safe and does not support concurrent "
|
||||
"renderer workers. Please set "
|
||||
"--renderer-num-workers 1 (the default), or "
|
||||
"disable the cache with --mm-processor-cache-gb 0."
|
||||
)
|
||||
|
||||
# Multimodal GGUF models must use original repo for mm processing
|
||||
if is_gguf(self.tokenizer) and self.is_multimodal_model:
|
||||
raise ValueError(
|
||||
|
||||
Reference in New Issue
Block a user