[Frontend] Automatic detection of chat content format from AST (#9919)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-11-16 13:35:40 +08:00
committed by GitHub
parent 4f168f69a3
commit 32e46e000f
16 changed files with 788 additions and 350 deletions

View File

@@ -1,8 +1,8 @@
from typing import List, Optional, Union
from typing import Final, List, Optional, Union
from vllm.config import ModelConfig
from vllm.engine.protocol import EngineClient
from vllm.entrypoints.chat_utils import load_chat_template
from vllm.entrypoints.chat_utils import ChatTemplateContentFormatOption
from vllm.entrypoints.logger import RequestLogger
# yapf conflicts with isort for this block
# yapf: disable
@@ -33,7 +33,8 @@ class OpenAIServingTokenization(OpenAIServing):
lora_modules: Optional[List[LoRAModulePath]],
request_logger: Optional[RequestLogger],
chat_template: Optional[str],
):
chat_template_content_format: ChatTemplateContentFormatOption,
) -> None:
super().__init__(engine_client=engine_client,
model_config=model_config,
base_model_paths=base_model_paths,
@@ -41,12 +42,8 @@ class OpenAIServingTokenization(OpenAIServing):
prompt_adapters=None,
request_logger=request_logger)
# If this is None we use the tokenizer's default chat template
# the list of commonly-used chat template names for HF named templates
hf_chat_templates: List[str] = ['default', 'tool_use']
self.chat_template = chat_template \
if chat_template in hf_chat_templates \
else load_chat_template(chat_template)
self.chat_template = chat_template
self.chat_template_content_format: Final = chat_template_content_format
async def create_tokenize(
self,
@@ -75,9 +72,12 @@ class OpenAIServingTokenization(OpenAIServing):
request,
tokenizer,
request.messages,
chat_template=self.chat_template,
chat_template=request.chat_template or self.chat_template,
chat_template_content_format=self.
chat_template_content_format,
add_generation_prompt=request.add_generation_prompt,
continue_final_message=request.continue_final_message,
chat_template_kwargs=request.chat_template_kwargs,
add_special_tokens=request.add_special_tokens,
)
else: