[Renderer] Separate out RendererConfig from ModelConfig (#30145)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-07 15:15:42 +08:00
committed by GitHub
parent a49d813fa8
commit 27f4c2fd46
105 changed files with 969 additions and 797 deletions

View File

@@ -3,7 +3,6 @@
import pytest
from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.tokenizers import get_tokenizer
@@ -107,24 +106,11 @@ def test_get_gen_prompt(
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_available_online(on_fail="skip")
model_config = ModelConfig(
model,
tokenizer=model_info.tokenizer or model,
tokenizer_mode=model_info.tokenizer_mode,
trust_remote_code=model_info.trust_remote_code,
revision=model_info.revision,
hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
dtype=model_info.dtype,
)
renderer_config = model_info.build_renderer_config(model)
# Initialize the tokenizer
tokenizer = get_tokenizer(
tokenizer_name=model_config.tokenizer,
trust_remote_code=model_config.trust_remote_code,
renderer_config.tokenizer,
trust_remote_code=renderer_config.trust_remote_code,
)
template_content = load_chat_template(chat_template=template)
@@ -143,7 +129,7 @@ def test_get_gen_prompt(
tokenizer=tokenizer,
conversation=mock_request.messages,
chat_template=mock_request.chat_template or template_content,
model_config=model_config,
renderer_config=renderer_config,
tools=None,
add_generation_prompt=mock_request.add_generation_prompt,
continue_final_message=mock_request.continue_final_message,

View File

@@ -33,26 +33,34 @@ class MockModelConfig:
"""Minimal mock ModelConfig for testing."""
model: str = MODEL_NAME
tokenizer: str = MODEL_NAME
trust_remote_code: bool = False
tokenizer_mode: str = "auto"
max_model_len: int = 100
tokenizer_revision: str | None = None
multimodal_config: MultiModalConfig = field(default_factory=MultiModalConfig)
hf_config: MockHFConfig = field(default_factory=MockHFConfig)
logits_processors: list[str] | None = None
logits_processor_pattern: str | None = None
diff_sampling_param: dict | None = None
allowed_local_media_path: str = ""
allowed_media_domains: list[str] | None = None
encoder_config = None
generation_config: str = "auto"
skip_tokenizer_init: bool = False
def get_diff_sampling_param(self):
return self.diff_sampling_param or {}
@dataclass
class MockRendererConfig:
"""Minimal mock RendererConfig for testing."""
model_config: MockModelConfig
tokenizer: str = MODEL_NAME
tokenizer_mode: str = "auto"
tokenizer_revision: str | None = None
skip_tokenizer_init: bool = False
allowed_local_media_path: str = ""
allowed_media_domains: list[str] | None = None
class MockLoRAResolver(LoRAResolver):
async def resolve_lora(
self, base_model_name: str, lora_name: str
@@ -114,6 +122,7 @@ def mock_serving_setup():
mock_engine.add_lora.reset_mock()
mock_engine.model_config = MockModelConfig()
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

View File

@@ -346,27 +346,33 @@ class MockHFConfig:
class MockModelConfig:
task = "generate"
runner_type = "generate"
tokenizer = MODEL_NAME
trust_remote_code = False
tokenizer_mode = "auto"
max_model_len = 100
tokenizer_revision = None
multimodal_config = MultiModalConfig()
hf_config = MockHFConfig()
logits_processors: list[str] | None = None
logits_processor_pattern = None
diff_sampling_param: dict | None = None
allowed_local_media_path: str = ""
allowed_media_domains: list[str] | None = None
encoder_config = None
generation_config: str = "auto"
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
skip_tokenizer_init = False
def get_diff_sampling_param(self):
return self.diff_sampling_param or {}
@dataclass
class MockRendererConfig:
model_config: MockModelConfig = field(default_factory=MockModelConfig)
tokenizer = MODEL_NAME
tokenizer_mode = "auto"
tokenizer_revision = None
skip_tokenizer_init = False
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
allowed_local_media_path: str = ""
allowed_media_domains: list[str] | None = None
def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
models = OpenAIServingModels(
engine_client=engine,
@@ -399,6 +405,7 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
@dataclass
class MockEngine:
model_config: MockModelConfig = field(default_factory=MockModelConfig)
renderer_config: MockRendererConfig = field(default_factory=MockRendererConfig)
input_processor: MagicMock = field(default_factory=MagicMock)
io_processor: MagicMock = field(default_factory=MagicMock)
@@ -429,6 +436,7 @@ async def test_serving_chat_returns_correct_model_name():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
@@ -459,6 +467,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
@@ -492,6 +501,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
@@ -537,6 +547,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
@@ -583,6 +594,7 @@ async def test_serving_chat_could_load_correct_generation_config():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
@@ -629,6 +641,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = mock_model_config
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()
@@ -662,6 +675,7 @@ async def test_serving_chat_data_parallel_rank_extraction():
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
mock_engine.errored = False
mock_engine.model_config = MockModelConfig()
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
mock_engine.input_processor = MagicMock()
mock_engine.io_processor = MagicMock()

View File

@@ -7,7 +7,7 @@ from unittest.mock import Mock
import pytest
from vllm.config import ModelConfig
from vllm.config import ModelConfig, RendererConfig
from vllm.entrypoints.openai.serving_engine import OpenAIServing
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
from vllm.tokenizers import MistralTokenizer
@@ -19,10 +19,16 @@ def serving() -> OpenAIServing:
# Create minimal mocks
engine_client = Mock()
model_config = Mock(spec=ModelConfig)
model_config.max_model_len = 32768
renderer_config = Mock(spec=RendererConfig)
renderer_config.model_config = model_config
models = Mock(spec=OpenAIServingModels)
models.model_config = model_config
models.renderer_config = renderer_config
models.input_processor = Mock()
models.io_processor = Mock()

View File

@@ -6,7 +6,7 @@ from unittest.mock import MagicMock
import pytest
from vllm.config import ModelConfig
from vllm.config import ModelConfig, RendererConfig
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.openai.protocol import (
ErrorResponse,
@@ -27,9 +27,15 @@ LORA_UNLOADING_SUCCESS_MESSAGE = (
async def _async_serving_models_init() -> OpenAIServingModels:
mock_engine_client = MagicMock(spec=EngineClient)
# Set the max_model_len attribute to avoid missing attribute
mock_model_config = MagicMock(spec=ModelConfig)
mock_model_config.max_model_len = 2048
mock_renderer_config = MagicMock(spec=RendererConfig)
mock_renderer_config.model_config = mock_model_config
mock_engine_client.model_config = mock_model_config
mock_engine_client.renderer_config = mock_renderer_config
mock_engine_client.input_processor = MagicMock()
mock_engine_client.io_processor = MagicMock()