[Model] Add Mistral Tokenization to improve robustness and chat encoding (#7739)

2024-08-27 14:40:02 +02:00
parent 9606c7197d
commit 6fc4e6e07a
12 changed files with 275 additions and 60 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -61,7 +61,8 @@ class ModelConfig:
            output when `served_model_name` is not specified. 
        tokenizer: Name or path of the huggingface tokenizer to use.
        tokenizer_mode: Tokenizer mode. "auto" will use the fast tokenizer if
-            available, and "slow" will always use the slow tokenizer.
+            available, "slow" will always use the slow tokenizer, and
+            "mistral" will always use the tokenizer from `mistral_common`.
        trust_remote_code: Trust remote code (e.g., from HuggingFace) when
            downloading the model and tokenizer.
        dtype: Data type for model weights and activations. The "auto" option
@@ -246,10 +247,10 @@ class ModelConfig:

    def _verify_tokenizer_mode(self) -> None:
        tokenizer_mode = self.tokenizer_mode.lower()
-        if tokenizer_mode not in ["auto", "slow"]:
+        if tokenizer_mode not in ["auto", "slow", "mistral"]:
            raise ValueError(
                f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be "
-                "either 'auto' or 'slow'.")
+                "either 'auto', 'slow' or 'mistral'.")
        self.tokenizer_mode = tokenizer_mode

    def _verify_embedding_mode(self) -> None: