[Frontend] Automatic detection of chat content format from AST (#9919)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2024-11-16 13:35:40 +08:00
committed by GitHub
parent 4f168f69a3
commit 32e46e000f
16 changed files with 788 additions and 350 deletions

View File

@@ -90,7 +90,6 @@ class EngineArgs:
task: TaskOption = "auto"
skip_tokenizer_init: bool = False
tokenizer_mode: str = 'auto'
chat_template_text_format: str = 'string'
trust_remote_code: bool = False
allowed_local_media_path: str = ""
download_dir: Optional[str] = None
@@ -258,14 +257,6 @@ class EngineArgs:
'fast tokenizer if available.\n* "slow" will '
'always use the slow tokenizer. \n* '
'"mistral" will always use the `mistral_common` tokenizer.')
parser.add_argument(
'--chat-template-text-format',
type=str,
default=EngineArgs.chat_template_text_format,
choices=['string', 'openai'],
help='The format to render text content within a chat template. '
'"string" will keep the content field as a string whereas '
'"openai" will parse content in the current OpenAI format.')
parser.add_argument('--trust-remote-code',
action='store_true',
help='Trust remote code from huggingface.')
@@ -894,7 +885,6 @@ class EngineArgs:
# We know this is not None because we set it in __post_init__
tokenizer=cast(str, self.tokenizer),
tokenizer_mode=self.tokenizer_mode,
chat_template_text_format=self.chat_template_text_format,
trust_remote_code=self.trust_remote_code,
allowed_local_media_path=self.allowed_local_media_path,
dtype=self.dtype,