[Renderer] Separate out RendererConfig from ModelConfig (#30145)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-07 15:15:42 +08:00
committed by GitHub
parent a49d813fa8
commit 27f4c2fd46
105 changed files with 969 additions and 797 deletions

View File

@@ -1099,7 +1099,7 @@ async def init_app_state(
logger.info("Supported tasks: %s", supported_tasks)
resolved_chat_template = await process_chat_template(
args.chat_template, engine_client, vllm_config.model_config
args.chat_template, engine_client, vllm_config.renderer_config
)
if args.tool_server == "demo":

View File

@@ -122,7 +122,7 @@ class OpenAIServingCompletion(OpenAIServing):
try:
lora_request = self._maybe_get_adapters(request)
if self.model_config.skip_tokenizer_init:
if self.renderer_config.skip_tokenizer_init:
tokenizer = None
else:
tokenizer = await self.engine_client.get_tokenizer()

View File

@@ -291,6 +291,7 @@ class OpenAIServing:
self.input_processor = self.models.input_processor
self.io_processor = self.models.io_processor
self.renderer_config = self.models.renderer_config
self.model_config = self.models.model_config
self.max_model_len = self.model_config.max_model_len
@@ -1100,18 +1101,18 @@ class OpenAIServing:
Sequence[RequestPrompt],
list[EngineTokensPrompt],
]:
model_config = self.model_config
renderer_config = self.renderer_config
resolved_content_format = resolve_chat_template_content_format(
chat_template,
tool_dicts,
chat_template_content_format,
tokenizer,
model_config=model_config,
renderer_config=renderer_config,
)
conversation, mm_data_future, mm_uuids = parse_chat_messages_futures(
messages,
model_config,
renderer_config,
content_format=resolved_content_format,
)
@@ -1138,14 +1139,14 @@ class OpenAIServing:
request_prompt = tokenizer.apply_chat_template(
conversation=conversation,
messages=messages,
model_config=model_config,
model_config=renderer_config.model_config,
**_chat_template_kwargs,
)
else:
request_prompt = apply_hf_chat_template(
tokenizer=tokenizer,
conversation=conversation,
model_config=model_config,
renderer_config=renderer_config,
**_chat_template_kwargs,
)

View File

@@ -71,6 +71,7 @@ class OpenAIServingModels:
self.input_processor = self.engine_client.input_processor
self.io_processor = self.engine_client.io_processor
self.renderer_config = self.engine_client.renderer_config
self.model_config = self.engine_client.model_config
self.max_model_len = self.model_config.max_model_len

View File

@@ -91,7 +91,7 @@ class OpenAISpeechToText(OpenAIServing):
self.task_type = task_type
self.asr_config = self.model_cls.get_speech_to_text_config(
self.model_config, task_type
self.renderer_config, task_type
)
self.enable_force_include_usage = enable_force_include_usage
@@ -101,8 +101,8 @@ class OpenAISpeechToText(OpenAIServing):
self.tokenizer = cast(
PreTrainedTokenizerBase,
get_tokenizer(
tokenizer_name=self.model_config.tokenizer,
tokenizer_mode=self.model_config.tokenizer_mode,
tokenizer_name=self.renderer_config.tokenizer,
tokenizer_mode=self.renderer_config.tokenizer_mode,
),
)
@@ -154,7 +154,7 @@ class OpenAISpeechToText(OpenAIServing):
prompt = self.model_cls.get_generation_prompt(
audio=chunk,
stt_config=self.asr_config,
model_config=self.model_config,
renderer_config=self.renderer_config,
language=language,
task_type=self.task_type,
request_prompt=request.prompt,
@@ -428,7 +428,7 @@ class OpenAISpeechToText(OpenAIServing):
if res.prompt_token_ids is not None:
num_prompt_tokens = len(res.prompt_token_ids)
if audio_tokens := self.model_cls.get_num_audio_tokens(
audio_duration_s, self.asr_config, self.model_config
audio_duration_s, self.asr_config, self.renderer_config
):
num_prompt_tokens += audio_tokens