feat(frontend): early-fail tokenization guard for user requests (#31366)
Signed-off-by: limingliang <limingliang@stepfun.com> Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Co-authored-by: limingliang <limingliang@stepfun.com> Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -272,6 +272,7 @@ class MistralTokenizer(TokenizerLike):
|
||||
# Vocab sorted by token id.
|
||||
self._vocab = self.tokenizer.vocab()
|
||||
self._max_token_id = self.vocab_size - 1
|
||||
self._max_chars_per_token = max(len(tok) for tok in self._vocab)
|
||||
|
||||
# Cache special tokens for faster access.
|
||||
self._special_token_ids = self._get_special_token_ids()
|
||||
@@ -325,6 +326,10 @@ class MistralTokenizer(TokenizerLike):
|
||||
def max_token_id(self) -> int:
|
||||
return self._max_token_id
|
||||
|
||||
@property
|
||||
def max_chars_per_token(self) -> int:
|
||||
return self._max_chars_per_token
|
||||
|
||||
@property
|
||||
def truncation_side(self) -> str:
|
||||
return self.transformers_tokenizer.truncation_side
|
||||
|
||||
Reference in New Issue
Block a user