[Frontend] Introduce Renderer for processing chat messages (using ModelConfig) (#30200)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-01-22 20:44:22 +08:00
committed by GitHub
parent 421012b63a
commit d117a4d1a9
48 changed files with 2141 additions and 1585 deletions

View File

@@ -5,7 +5,13 @@ import pytest
from vllm.assets.image import ImageAsset
from vllm.assets.video import VideoAsset
from vllm.config import CacheConfig, DeviceConfig, ModelConfig, VllmConfig
from vllm.config import (
CacheConfig,
DeviceConfig,
ModelConfig,
MultiModalConfig,
VllmConfig,
)
from vllm.multimodal import MultiModalRegistry, MultiModalUUIDDict
from vllm.sampling_params import SamplingParams
from vllm.v1.engine.input_processor import InputProcessor
@@ -44,27 +50,22 @@ def _mock_input_processor(
monkeypatch.setattr(VllmConfig, "__post_init__", lambda self: None, raising=True)
model_config = ModelConfig(
tokenizer="dummy",
skip_tokenizer_init=True,
max_model_len=128,
mm_processor_cache_gb=mm_cache_gb,
generation_config="vllm",
tokenizer="dummy",
)
model_config.runner_type = "generate"
model_config.multimodal_config = MultiModalConfig(mm_processor_cache_gb=mm_cache_gb)
# Minimal multimodal_config to satisfy references in
# Processor.process_inputs.
class _MockMMConfig:
def __init__(self, gb: float):
self.mm_processor_cache_gb = gb
model_config.multimodal_config = _MockMMConfig(mm_cache_gb) # type: ignore[attr-defined]
vllm_config = VllmConfig(
model_config=model_config,
cache_config=CacheConfig(enable_prefix_caching=enable_prefix_caching),
device_config=DeviceConfig(device="cpu"),
)
return InputProcessor(vllm_config, tokenizer=None)
return InputProcessor(vllm_config)
def test_multi_modal_uuids_length_mismatch_raises(monkeypatch):