[Refactor] TokenizerRegistry only uses lazy imports (#30609)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-13 23:16:22 +08:00
committed by GitHub
parent ace34e3783
commit 39cefbdf17
14 changed files with 201 additions and 175 deletions

View File

@@ -3,10 +3,11 @@
from pathlib import Path
from typing import TYPE_CHECKING, Any, cast
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
from vllm.logger import init_logger
from .protocol import TokenizerLike
from .registry import TokenizerRegistry
if TYPE_CHECKING:
from mistral_common.protocol.instruct.request import (
@@ -15,9 +16,6 @@ if TYPE_CHECKING:
from mistral_common.tokens.tokenizers.tekken import Tekkenizer
from transformers import BatchEncoding
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
try:
# Transformers v5
from transformers.tokenization_mistral_common import MistralCommonBackend
@@ -201,7 +199,6 @@ def _tekken_token_to_id(tokenizer: "Tekkenizer", t: str | bytes) -> int:
return tokenizer.unk_id
@TokenizerRegistry.register("mistral")
class MistralTokenizer(TokenizerLike):
@classmethod
def from_pretrained(