feat(frontend): early-fail tokenization guard for user requests (#31366)

Signed-off-by: limingliang <limingliang@stepfun.com>
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
Co-authored-by: limingliang <limingliang@stepfun.com>
Co-authored-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Mingliang Li
2026-02-06 11:38:02 +08:00
committed by GitHub
parent 20d7454c9b
commit a32cb49b60
7 changed files with 315 additions and 209 deletions

View File

@@ -272,6 +272,7 @@ class MistralTokenizer(TokenizerLike):
# Vocab sorted by token id.
self._vocab = self.tokenizer.vocab()
self._max_token_id = self.vocab_size - 1
self._max_chars_per_token = max(len(tok) for tok in self._vocab)
# Cache special tokens for faster access.
self._special_token_ids = self._get_special_token_ids()
@@ -325,6 +326,10 @@ class MistralTokenizer(TokenizerLike):
def max_token_id(self) -> int:
return self._max_token_id
@property
def max_chars_per_token(self) -> int:
return self._max_chars_per_token
@property
def truncation_side(self) -> str:
return self.transformers_tokenizer.truncation_side