[Renderer] Separate out RendererConfig from ModelConfig (#30145)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -1099,7 +1099,7 @@ async def init_app_state(
|
||||
logger.info("Supported tasks: %s", supported_tasks)
|
||||
|
||||
resolved_chat_template = await process_chat_template(
|
||||
args.chat_template, engine_client, vllm_config.model_config
|
||||
args.chat_template, engine_client, vllm_config.renderer_config
|
||||
)
|
||||
|
||||
if args.tool_server == "demo":
|
||||
|
||||
@@ -122,7 +122,7 @@ class OpenAIServingCompletion(OpenAIServing):
|
||||
try:
|
||||
lora_request = self._maybe_get_adapters(request)
|
||||
|
||||
if self.model_config.skip_tokenizer_init:
|
||||
if self.renderer_config.skip_tokenizer_init:
|
||||
tokenizer = None
|
||||
else:
|
||||
tokenizer = await self.engine_client.get_tokenizer()
|
||||
|
||||
@@ -291,6 +291,7 @@ class OpenAIServing:
|
||||
|
||||
self.input_processor = self.models.input_processor
|
||||
self.io_processor = self.models.io_processor
|
||||
self.renderer_config = self.models.renderer_config
|
||||
self.model_config = self.models.model_config
|
||||
self.max_model_len = self.model_config.max_model_len
|
||||
|
||||
@@ -1100,18 +1101,18 @@ class OpenAIServing:
|
||||
Sequence[RequestPrompt],
|
||||
list[EngineTokensPrompt],
|
||||
]:
|
||||
model_config = self.model_config
|
||||
renderer_config = self.renderer_config
|
||||
|
||||
resolved_content_format = resolve_chat_template_content_format(
|
||||
chat_template,
|
||||
tool_dicts,
|
||||
chat_template_content_format,
|
||||
tokenizer,
|
||||
model_config=model_config,
|
||||
renderer_config=renderer_config,
|
||||
)
|
||||
conversation, mm_data_future, mm_uuids = parse_chat_messages_futures(
|
||||
messages,
|
||||
model_config,
|
||||
renderer_config,
|
||||
content_format=resolved_content_format,
|
||||
)
|
||||
|
||||
@@ -1138,14 +1139,14 @@ class OpenAIServing:
|
||||
request_prompt = tokenizer.apply_chat_template(
|
||||
conversation=conversation,
|
||||
messages=messages,
|
||||
model_config=model_config,
|
||||
model_config=renderer_config.model_config,
|
||||
**_chat_template_kwargs,
|
||||
)
|
||||
else:
|
||||
request_prompt = apply_hf_chat_template(
|
||||
tokenizer=tokenizer,
|
||||
conversation=conversation,
|
||||
model_config=model_config,
|
||||
renderer_config=renderer_config,
|
||||
**_chat_template_kwargs,
|
||||
)
|
||||
|
||||
|
||||
@@ -71,6 +71,7 @@ class OpenAIServingModels:
|
||||
|
||||
self.input_processor = self.engine_client.input_processor
|
||||
self.io_processor = self.engine_client.io_processor
|
||||
self.renderer_config = self.engine_client.renderer_config
|
||||
self.model_config = self.engine_client.model_config
|
||||
self.max_model_len = self.model_config.max_model_len
|
||||
|
||||
|
||||
@@ -91,7 +91,7 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
self.task_type = task_type
|
||||
|
||||
self.asr_config = self.model_cls.get_speech_to_text_config(
|
||||
self.model_config, task_type
|
||||
self.renderer_config, task_type
|
||||
)
|
||||
|
||||
self.enable_force_include_usage = enable_force_include_usage
|
||||
@@ -101,8 +101,8 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
self.tokenizer = cast(
|
||||
PreTrainedTokenizerBase,
|
||||
get_tokenizer(
|
||||
tokenizer_name=self.model_config.tokenizer,
|
||||
tokenizer_mode=self.model_config.tokenizer_mode,
|
||||
tokenizer_name=self.renderer_config.tokenizer,
|
||||
tokenizer_mode=self.renderer_config.tokenizer_mode,
|
||||
),
|
||||
)
|
||||
|
||||
@@ -154,7 +154,7 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
prompt = self.model_cls.get_generation_prompt(
|
||||
audio=chunk,
|
||||
stt_config=self.asr_config,
|
||||
model_config=self.model_config,
|
||||
renderer_config=self.renderer_config,
|
||||
language=language,
|
||||
task_type=self.task_type,
|
||||
request_prompt=request.prompt,
|
||||
@@ -428,7 +428,7 @@ class OpenAISpeechToText(OpenAIServing):
|
||||
if res.prompt_token_ids is not None:
|
||||
num_prompt_tokens = len(res.prompt_token_ids)
|
||||
if audio_tokens := self.model_cls.get_num_audio_tokens(
|
||||
audio_duration_s, self.asr_config, self.model_config
|
||||
audio_duration_s, self.asr_config, self.renderer_config
|
||||
):
|
||||
num_prompt_tokens += audio_tokens
|
||||
|
||||
|
||||
Reference in New Issue
Block a user