[mistral_common] Add v11 tokenizer (#19193)

Signed-off-by: Patrick von Platen <patrick.v.platen@gmail.com>
This commit is contained in:
Patrick von Platen
2025-06-05 17:27:41 +02:00
committed by GitHub
parent 9bc8bb07cf
commit f20f9f063b
2 changed files with 32 additions and 4 deletions

View File

@@ -187,6 +187,8 @@ class MistralTokenizer(TokenizerBase):
def __init__(self, tokenizer: "PublicMistralTokenizer") -> None:
self.mistral = tokenizer
self.instruct = tokenizer.instruct_tokenizer
_mistral_version_str = self.instruct.tokenizer.version.value
self.version: int = int(_mistral_version_str.split("v")[-1])
tokenizer_ = tokenizer.instruct_tokenizer.tokenizer
from mistral_common.tokens.tokenizers.tekken import (