[Bugfix][Frontend] Guard against bad token ids (#9634)
Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
This commit is contained in:
@@ -85,6 +85,7 @@ class MistralTokenizer:
|
||||
raise TypeError(f"Unsupported tokenizer: {type(tokenizer_)}")
|
||||
|
||||
self.tokenizer = tokenizer_
|
||||
self._max_token_id = max(self._vocab.values())
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls,
|
||||
@@ -158,6 +159,10 @@ class MistralTokenizer:
|
||||
def vocab_size(self) -> int:
|
||||
return len(self._vocab)
|
||||
|
||||
@property
|
||||
def max_token_id(self) -> int:
|
||||
return self._max_token_id
|
||||
|
||||
def __len__(self) -> int:
|
||||
return self.vocab_size
|
||||
|
||||
|
||||
Reference in New Issue
Block a user