[Frontend] Chat template fallbacks for multimodal models (#17805)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -38,6 +38,10 @@ from vllm.config import ModelConfig
|
||||
from vllm.logger import init_logger
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
|
||||
from vllm.multimodal.utils import MediaConnector
|
||||
# yapf: disable
|
||||
from vllm.transformers_utils.chat_templates import (
|
||||
get_chat_template_fallback_path)
|
||||
# yapf: enable
|
||||
from vllm.transformers_utils.processor import cached_get_processor
|
||||
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
|
||||
|
||||
@@ -325,11 +329,10 @@ def resolve_mistral_chat_template(
|
||||
return None
|
||||
|
||||
def resolve_hf_chat_template(
|
||||
model_config: ModelConfig,
|
||||
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
||||
chat_template: Optional[str],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
*,
|
||||
trust_remote_code: bool,
|
||||
) -> Optional[str]:
|
||||
# 1st priority: The given chat template
|
||||
if chat_template is not None:
|
||||
@@ -342,7 +345,7 @@ def resolve_hf_chat_template(
|
||||
tokenizer.name_or_path,
|
||||
processor_cls=(PreTrainedTokenizer, PreTrainedTokenizerFast,
|
||||
ProcessorMixin),
|
||||
trust_remote_code=trust_remote_code,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
)
|
||||
if isinstance(processor, ProcessorMixin) and \
|
||||
processor.chat_template is not None:
|
||||
@@ -358,22 +361,34 @@ def resolve_hf_chat_template(
|
||||
logger.debug("Failed to load AutoTokenizer chat template for %s",
|
||||
tokenizer.name_or_path, exc_info=True)
|
||||
|
||||
return None
|
||||
# 4th priority: Predefined fallbacks
|
||||
path = get_chat_template_fallback_path(
|
||||
model_type=model_config.hf_config.model_type,
|
||||
tokenizer_name_or_path=model_config.tokenizer,
|
||||
)
|
||||
if path is not None:
|
||||
logger.info("Loading chat template fallback for %s as there isn't one "
|
||||
"defined on HF Hub.", tokenizer.name_or_path)
|
||||
chat_template = load_chat_template(path)
|
||||
else:
|
||||
logger.debug("There is no chat template fallback for %s",
|
||||
tokenizer.name_or_path)
|
||||
|
||||
return chat_template
|
||||
|
||||
|
||||
def _resolve_chat_template_content_format(
|
||||
model_config: ModelConfig,
|
||||
chat_template: Optional[str],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
given_format: ChatTemplateContentFormatOption,
|
||||
tokenizer: AnyTokenizer,
|
||||
*,
|
||||
trust_remote_code: bool,
|
||||
) -> _ChatTemplateContentFormat:
|
||||
if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
|
||||
hf_chat_template = resolve_hf_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
chat_template=chat_template,
|
||||
trust_remote_code=trust_remote_code,
|
||||
tools=tools,
|
||||
)
|
||||
else:
|
||||
@@ -413,19 +428,18 @@ def _log_chat_template_content_format(
|
||||
|
||||
|
||||
def resolve_chat_template_content_format(
|
||||
model_config: ModelConfig,
|
||||
chat_template: Optional[str],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
given_format: ChatTemplateContentFormatOption,
|
||||
tokenizer: AnyTokenizer,
|
||||
*,
|
||||
trust_remote_code: bool = False,
|
||||
) -> _ChatTemplateContentFormat:
|
||||
detected_format = _resolve_chat_template_content_format(
|
||||
model_config,
|
||||
chat_template,
|
||||
tools,
|
||||
given_format,
|
||||
tokenizer,
|
||||
trust_remote_code=trust_remote_code,
|
||||
)
|
||||
|
||||
_log_chat_template_content_format(
|
||||
@@ -1177,20 +1191,20 @@ def parse_chat_messages_futures(
|
||||
|
||||
|
||||
def apply_hf_chat_template(
|
||||
model_config: ModelConfig,
|
||||
tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast],
|
||||
conversation: list[ConversationMessage],
|
||||
chat_template: Optional[str],
|
||||
tools: Optional[list[dict[str, Any]]],
|
||||
*,
|
||||
trust_remote_code: bool = False,
|
||||
tokenize: bool = False, # Different from HF's default
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
hf_chat_template = resolve_hf_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
chat_template=chat_template,
|
||||
tools=tools,
|
||||
trust_remote_code=trust_remote_code,
|
||||
)
|
||||
|
||||
if hf_chat_template is None:
|
||||
|
||||
@@ -726,11 +726,11 @@ class LLM:
|
||||
tokenizer = self.get_tokenizer(lora_request)
|
||||
model_config = self.llm_engine.get_model_config()
|
||||
resolved_content_format = resolve_chat_template_content_format(
|
||||
model_config,
|
||||
chat_template,
|
||||
tools,
|
||||
chat_template_content_format,
|
||||
tokenizer,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
)
|
||||
|
||||
_chat_template_kwargs: dict[str, Any] = dict(
|
||||
@@ -762,8 +762,8 @@ class LLM:
|
||||
)
|
||||
else:
|
||||
prompt_str = apply_hf_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
conversation=conversation,
|
||||
**_chat_template_kwargs,
|
||||
)
|
||||
|
||||
@@ -937,10 +937,11 @@ async def init_app_state(
|
||||
chat_template=resolved_chat_template)
|
||||
else:
|
||||
hf_chat_template = resolve_hf_chat_template(
|
||||
vllm_config.model_config,
|
||||
tokenizer,
|
||||
chat_template=None,
|
||||
tools=None,
|
||||
trust_remote_code=model_config.trust_remote_code)
|
||||
)
|
||||
|
||||
if hf_chat_template != resolved_chat_template:
|
||||
logger.warning(
|
||||
|
||||
@@ -394,11 +394,11 @@ class OpenAIServing:
|
||||
model_config = self.model_config
|
||||
|
||||
resolved_content_format = resolve_chat_template_content_format(
|
||||
model_config,
|
||||
chat_template,
|
||||
tool_dicts,
|
||||
chat_template_content_format,
|
||||
tokenizer,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
)
|
||||
conversation, mm_data_future = parse_chat_messages_futures(
|
||||
messages,
|
||||
@@ -425,8 +425,8 @@ class OpenAIServing:
|
||||
)
|
||||
else:
|
||||
request_prompt = apply_hf_chat_template(
|
||||
model_config,
|
||||
tokenizer,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
conversation=conversation,
|
||||
**_chat_template_kwargs,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user