[Renderer] Separate out RendererConfig from ModelConfig (#30145)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -3,7 +3,6 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.entrypoints.chat_utils import apply_hf_chat_template, load_chat_template
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from vllm.tokenizers import get_tokenizer
|
||||
@@ -107,24 +106,11 @@ def test_get_gen_prompt(
|
||||
model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
|
||||
model_info.check_available_online(on_fail="skip")
|
||||
|
||||
model_config = ModelConfig(
|
||||
model,
|
||||
tokenizer=model_info.tokenizer or model,
|
||||
tokenizer_mode=model_info.tokenizer_mode,
|
||||
trust_remote_code=model_info.trust_remote_code,
|
||||
revision=model_info.revision,
|
||||
hf_overrides=model_info.hf_overrides,
|
||||
skip_tokenizer_init=model_info.require_embed_inputs,
|
||||
enable_prompt_embeds=model_info.require_embed_inputs,
|
||||
enable_mm_embeds=model_info.require_embed_inputs,
|
||||
enforce_eager=model_info.enforce_eager,
|
||||
dtype=model_info.dtype,
|
||||
)
|
||||
renderer_config = model_info.build_renderer_config(model)
|
||||
|
||||
# Initialize the tokenizer
|
||||
tokenizer = get_tokenizer(
|
||||
tokenizer_name=model_config.tokenizer,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
renderer_config.tokenizer,
|
||||
trust_remote_code=renderer_config.trust_remote_code,
|
||||
)
|
||||
template_content = load_chat_template(chat_template=template)
|
||||
|
||||
@@ -143,7 +129,7 @@ def test_get_gen_prompt(
|
||||
tokenizer=tokenizer,
|
||||
conversation=mock_request.messages,
|
||||
chat_template=mock_request.chat_template or template_content,
|
||||
model_config=model_config,
|
||||
renderer_config=renderer_config,
|
||||
tools=None,
|
||||
add_generation_prompt=mock_request.add_generation_prompt,
|
||||
continue_final_message=mock_request.continue_final_message,
|
||||
|
||||
@@ -33,26 +33,34 @@ class MockModelConfig:
|
||||
"""Minimal mock ModelConfig for testing."""
|
||||
|
||||
model: str = MODEL_NAME
|
||||
tokenizer: str = MODEL_NAME
|
||||
trust_remote_code: bool = False
|
||||
tokenizer_mode: str = "auto"
|
||||
max_model_len: int = 100
|
||||
tokenizer_revision: str | None = None
|
||||
multimodal_config: MultiModalConfig = field(default_factory=MultiModalConfig)
|
||||
hf_config: MockHFConfig = field(default_factory=MockHFConfig)
|
||||
logits_processors: list[str] | None = None
|
||||
logits_processor_pattern: str | None = None
|
||||
diff_sampling_param: dict | None = None
|
||||
allowed_local_media_path: str = ""
|
||||
allowed_media_domains: list[str] | None = None
|
||||
encoder_config = None
|
||||
generation_config: str = "auto"
|
||||
skip_tokenizer_init: bool = False
|
||||
|
||||
def get_diff_sampling_param(self):
|
||||
return self.diff_sampling_param or {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockRendererConfig:
|
||||
"""Minimal mock RendererConfig for testing."""
|
||||
|
||||
model_config: MockModelConfig
|
||||
|
||||
tokenizer: str = MODEL_NAME
|
||||
tokenizer_mode: str = "auto"
|
||||
tokenizer_revision: str | None = None
|
||||
skip_tokenizer_init: bool = False
|
||||
allowed_local_media_path: str = ""
|
||||
allowed_media_domains: list[str] | None = None
|
||||
|
||||
|
||||
class MockLoRAResolver(LoRAResolver):
|
||||
async def resolve_lora(
|
||||
self, base_model_name: str, lora_name: str
|
||||
@@ -114,6 +122,7 @@ def mock_serving_setup():
|
||||
mock_engine.add_lora.reset_mock()
|
||||
|
||||
mock_engine.model_config = MockModelConfig()
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
|
||||
@@ -346,27 +346,33 @@ class MockHFConfig:
|
||||
class MockModelConfig:
|
||||
task = "generate"
|
||||
runner_type = "generate"
|
||||
tokenizer = MODEL_NAME
|
||||
trust_remote_code = False
|
||||
tokenizer_mode = "auto"
|
||||
max_model_len = 100
|
||||
tokenizer_revision = None
|
||||
multimodal_config = MultiModalConfig()
|
||||
hf_config = MockHFConfig()
|
||||
logits_processors: list[str] | None = None
|
||||
logits_processor_pattern = None
|
||||
diff_sampling_param: dict | None = None
|
||||
allowed_local_media_path: str = ""
|
||||
allowed_media_domains: list[str] | None = None
|
||||
encoder_config = None
|
||||
generation_config: str = "auto"
|
||||
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
|
||||
skip_tokenizer_init = False
|
||||
|
||||
def get_diff_sampling_param(self):
|
||||
return self.diff_sampling_param or {}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MockRendererConfig:
|
||||
model_config: MockModelConfig = field(default_factory=MockModelConfig)
|
||||
|
||||
tokenizer = MODEL_NAME
|
||||
tokenizer_mode = "auto"
|
||||
tokenizer_revision = None
|
||||
skip_tokenizer_init = False
|
||||
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
|
||||
allowed_local_media_path: str = ""
|
||||
allowed_media_domains: list[str] | None = None
|
||||
|
||||
|
||||
def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
|
||||
models = OpenAIServingModels(
|
||||
engine_client=engine,
|
||||
@@ -399,6 +405,7 @@ def _build_serving_chat(engine: AsyncLLM) -> OpenAIServingChat:
|
||||
@dataclass
|
||||
class MockEngine:
|
||||
model_config: MockModelConfig = field(default_factory=MockModelConfig)
|
||||
renderer_config: MockRendererConfig = field(default_factory=MockRendererConfig)
|
||||
input_processor: MagicMock = field(default_factory=MagicMock)
|
||||
io_processor: MagicMock = field(default_factory=MagicMock)
|
||||
|
||||
@@ -429,6 +436,7 @@ async def test_serving_chat_returns_correct_model_name():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = MockModelConfig()
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
@@ -459,6 +467,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = MockModelConfig()
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
@@ -492,6 +501,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = mock_model_config
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
@@ -537,6 +547,7 @@ async def test_serving_chat_should_set_correct_max_tokens():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = mock_model_config
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
@@ -583,6 +594,7 @@ async def test_serving_chat_could_load_correct_generation_config():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = mock_model_config
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
@@ -629,6 +641,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type):
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = mock_model_config
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
@@ -662,6 +675,7 @@ async def test_serving_chat_data_parallel_rank_extraction():
|
||||
mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME)
|
||||
mock_engine.errored = False
|
||||
mock_engine.model_config = MockModelConfig()
|
||||
mock_engine.renderer_config = MockRendererConfig(mock_engine.model_config)
|
||||
mock_engine.input_processor = MagicMock()
|
||||
mock_engine.io_processor = MagicMock()
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.config import ModelConfig, RendererConfig
|
||||
from vllm.entrypoints.openai.serving_engine import OpenAIServing
|
||||
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
|
||||
from vllm.tokenizers import MistralTokenizer
|
||||
@@ -19,10 +19,16 @@ def serving() -> OpenAIServing:
|
||||
|
||||
# Create minimal mocks
|
||||
engine_client = Mock()
|
||||
|
||||
model_config = Mock(spec=ModelConfig)
|
||||
model_config.max_model_len = 32768
|
||||
|
||||
renderer_config = Mock(spec=RendererConfig)
|
||||
renderer_config.model_config = model_config
|
||||
|
||||
models = Mock(spec=OpenAIServingModels)
|
||||
models.model_config = model_config
|
||||
models.renderer_config = renderer_config
|
||||
models.input_processor = Mock()
|
||||
models.io_processor = Mock()
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.config import ModelConfig, RendererConfig
|
||||
from vllm.engine.protocol import EngineClient
|
||||
from vllm.entrypoints.openai.protocol import (
|
||||
ErrorResponse,
|
||||
@@ -27,9 +27,15 @@ LORA_UNLOADING_SUCCESS_MESSAGE = (
|
||||
async def _async_serving_models_init() -> OpenAIServingModels:
|
||||
mock_engine_client = MagicMock(spec=EngineClient)
|
||||
# Set the max_model_len attribute to avoid missing attribute
|
||||
|
||||
mock_model_config = MagicMock(spec=ModelConfig)
|
||||
mock_model_config.max_model_len = 2048
|
||||
|
||||
mock_renderer_config = MagicMock(spec=RendererConfig)
|
||||
mock_renderer_config.model_config = mock_model_config
|
||||
|
||||
mock_engine_client.model_config = mock_model_config
|
||||
mock_engine_client.renderer_config = mock_renderer_config
|
||||
mock_engine_client.input_processor = MagicMock()
|
||||
mock_engine_client.io_processor = MagicMock()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user