[Model] Add Mistral Tokenization to improve robustness and chat encoding (#7739)

This commit is contained in:
Patrick von Platen
2024-08-27 14:40:02 +02:00
committed by GitHub
parent 9606c7197d
commit 6fc4e6e07a
12 changed files with 275 additions and 60 deletions

View File

@@ -267,7 +267,7 @@ def apply_chat_template(
*,
tokenize: bool = False, # Different from HF's default
**kwargs: Any,
) -> str:
) -> Union[str, List[int]]:
if chat_template is None and tokenizer.chat_template is None:
raise ValueError(
"As of transformers v4.44, default chat template is no longer "
@@ -280,6 +280,4 @@ def apply_chat_template(
tokenize=tokenize,
**kwargs,
)
assert isinstance(prompt, str)
return prompt