[Bugfix] Ensure special tokens are properly filtered out for guided structured output with MistralTokenizer (#10363)
Signed-off-by: Guillaume Calmettes <gcalmettes@scaleway.com>
This commit is contained in:
committed by
GitHub
parent
3a763ba0c3
commit
691a3ec047
@@ -174,18 +174,29 @@ class MistralTokenizer:
|
||||
revision=revision)
|
||||
return tokenizer_file
|
||||
|
||||
# the following attributes are set to fit VLLM's design
|
||||
# the following attributes are set to fit VLLM's design and are used
|
||||
# by the guided structured output backends.
|
||||
@property
|
||||
def all_special_tokens_extended(self) -> List[str]:
|
||||
return []
|
||||
# tekken defines its own extended special tokens list
|
||||
if hasattr(self.tokenizer, "SPECIAL_TOKENS"):
|
||||
special_tokens = self.tokenizer.SPECIAL_TOKENS
|
||||
else:
|
||||
special_tokens = list(SpecialTokens)
|
||||
return [
|
||||
s.value if isinstance(s, SpecialTokens) else s
|
||||
for s in special_tokens
|
||||
]
|
||||
|
||||
@property
|
||||
def all_special_tokens(self) -> List[str]:
|
||||
return []
|
||||
return self.all_special_tokens_extended
|
||||
|
||||
@property
|
||||
def all_special_ids(self) -> List[int]:
|
||||
return []
|
||||
return [
|
||||
self.all_special_tokens.index(t) for t in self.all_special_tokens
|
||||
]
|
||||
|
||||
@property
|
||||
def bos_token_id(self) -> int:
|
||||
|
||||
Reference in New Issue
Block a user