[Renderer] Separate out RendererConfig from ModelConfig (#30145)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -6,7 +6,7 @@ import numpy as np
|
||||
import pytest
|
||||
import torch
|
||||
|
||||
from vllm.config import ModelConfig, ParallelConfig, VllmConfig
|
||||
from vllm.config import ModelConfig, ParallelConfig, RendererConfig, VllmConfig
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.cache import (
|
||||
BaseMultiModalProcessorCache,
|
||||
@@ -110,11 +110,14 @@ def _create_vllm_config(
|
||||
mm_processor_cache_gb: float,
|
||||
enable_ipc: bool,
|
||||
):
|
||||
model_config = ModelConfig(
|
||||
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||
mm_processor_cache_gb=mm_processor_cache_gb,
|
||||
)
|
||||
|
||||
return VllmConfig(
|
||||
model_config=ModelConfig(
|
||||
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||
mm_processor_cache_gb=mm_processor_cache_gb,
|
||||
),
|
||||
model_config=model_config,
|
||||
renderer_config=RendererConfig(model_config=model_config),
|
||||
parallel_config=ParallelConfig(data_parallel_size=1 if enable_ipc else 2),
|
||||
)
|
||||
|
||||
@@ -506,13 +509,15 @@ def _run_test_cache_eviction_shm(
|
||||
|
||||
|
||||
def test_cache_eviction_shm_cache():
|
||||
model_config = ModelConfig(
|
||||
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||
mm_processor_cache_type="shm",
|
||||
mm_shm_cache_max_object_size_mb=6,
|
||||
mm_processor_cache_gb=15.2 * MiB_bytes / GiB_bytes,
|
||||
)
|
||||
vllm_config = VllmConfig(
|
||||
model_config=ModelConfig(
|
||||
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
|
||||
mm_processor_cache_type="shm",
|
||||
mm_shm_cache_max_object_size_mb=6,
|
||||
mm_processor_cache_gb=15.2 * MiB_bytes / GiB_bytes,
|
||||
),
|
||||
model_config=model_config,
|
||||
renderer_config=RendererConfig(model_config=model_config),
|
||||
)
|
||||
sender_cache = ShmObjectStoreSenderCache(vllm_config)
|
||||
receiver_cache = ShmObjectStoreReceiverCache(vllm_config, mp.Lock())
|
||||
|
||||
@@ -7,7 +7,7 @@ from contextlib import nullcontext
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.config import ModelConfig, RendererConfig
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.multimodal.processing import (
|
||||
InputProcessingContext,
|
||||
@@ -920,8 +920,9 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
|
||||
model=model_id,
|
||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||
)
|
||||
renderer_config = RendererConfig(model_config=model_config)
|
||||
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(model_config)
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(renderer_config)
|
||||
processor._supported_mm_limits = {"image": num_supported}
|
||||
|
||||
profiler = MultiModalProfiler(processor)
|
||||
@@ -955,8 +956,9 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
|
||||
model=model_id,
|
||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||
)
|
||||
renderer_config = RendererConfig(model_config=model_config)
|
||||
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(model_config)
|
||||
processor = MULTIMODAL_REGISTRY.create_processor(renderer_config)
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
image = random_image(rng, min_wh=128, max_wh=256)
|
||||
@@ -1012,11 +1014,13 @@ def test_hf_processor_init_kwargs(
|
||||
inference_kwargs,
|
||||
expected_kwargs,
|
||||
):
|
||||
ctx = InputProcessingContext(
|
||||
model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
|
||||
tokenizer=None,
|
||||
model_config = ModelConfig(model_id, mm_processor_kwargs=config_kwargs)
|
||||
renderer_config = RendererConfig(
|
||||
model_config=model_config,
|
||||
tokenizer=model_id,
|
||||
)
|
||||
|
||||
ctx = InputProcessingContext.from_config(renderer_config)
|
||||
processor = ctx.get_hf_processor(
|
||||
DummyProcessor, # type: ignore[arg-type]
|
||||
**inference_kwargs,
|
||||
@@ -1045,11 +1049,13 @@ def test_hf_processor_call_kwargs(
|
||||
inference_kwargs,
|
||||
expected_kwargs,
|
||||
):
|
||||
ctx = InputProcessingContext(
|
||||
model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
|
||||
tokenizer=None,
|
||||
model_config = ModelConfig(model_id, mm_processor_kwargs=config_kwargs)
|
||||
renderer_config = RendererConfig(
|
||||
model_config=model_config,
|
||||
tokenizer=model_id,
|
||||
)
|
||||
|
||||
ctx = InputProcessingContext.from_config(renderer_config)
|
||||
processor = ctx.get_hf_processor(DummyProcessor) # type: ignore[arg-type]
|
||||
|
||||
result = ctx.call_hf_processor(processor, {}, inference_kwargs)
|
||||
|
||||
@@ -31,4 +31,6 @@ def test_supports_multimodal_inputs(model_id, limit_mm_per_prompt, expected):
|
||||
model_id,
|
||||
limit_mm_per_prompt=limit_mm_per_prompt,
|
||||
)
|
||||
assert MULTIMODAL_REGISTRY.supports_multimodal_inputs(ctx.model_config) is expected
|
||||
assert (
|
||||
MULTIMODAL_REGISTRY.supports_multimodal_inputs(ctx.renderer_config) is expected
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user