[Misc] Unify tokenizer registration (#29767)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-01 19:34:58 +08:00
committed by GitHub
parent 86e178f7c4
commit f0a28bf661
14 changed files with 237 additions and 183 deletions

View File

@@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Any, cast
from vllm.logger import init_logger
from .protocol import TokenizerLike
from .registry import TokenizerRegistry
if TYPE_CHECKING:
from mistral_common.protocol.instruct.request import (
@@ -165,6 +166,7 @@ def _tekken_token_to_id(tokenizer: "Tekkenizer", t: str | bytes) -> int:
return tokenizer.unk_id
@TokenizerRegistry.register("mistral")
class MistralTokenizer(TokenizerLike):
@classmethod
def from_pretrained(