[Misc] Refactor tokenizer interface (#29693)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-11-29 20:02:21 +08:00
committed by GitHub
parent f223ed4181
commit 34a984274e
119 changed files with 752 additions and 821 deletions

View File

@@ -9,7 +9,7 @@ import torch
from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm.config.model import RunnerOption
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.tokenizers import TokenizerLike
from .....conftest import HfRunner, VllmRunner
from ....registry import HF_EXAMPLE_MODELS
@@ -33,7 +33,7 @@ def run_test(
auto_cls: type[_BaseAutoModelClass],
use_tokenizer_eos: bool,
comparator: Callable[..., None],
get_stop_token_ids: Callable[[AnyTokenizer], list[int]] | None,
get_stop_token_ids: Callable[[TokenizerLike], list[int]] | None,
stop_str: list[str] | None,
limit_mm_per_prompt: dict[str, int],
vllm_runner_kwargs: dict[str, Any] | None,

View File

@@ -14,7 +14,7 @@ from transformers.models.auto.auto_factory import _BaseAutoModelClass
from vllm.config.model import RunnerOption
from vllm.logprobs import SampleLogprobs
from vllm.transformers_utils.tokenizer import AnyTokenizer
from vllm.tokenizers import TokenizerLike
from .....conftest import (
AUDIO_ASSETS,
@@ -126,7 +126,7 @@ class VLMTestInfo(NamedTuple):
vllm_runner_kwargs: dict[str, Any] | None = None
# Optional callable which gets a list of token IDs from the model tokenizer
get_stop_token_ids: Callable[[AnyTokenizer], list[int]] | None = None
get_stop_token_ids: Callable[[TokenizerLike], list[int]] | None = None
# Optional list of strings to stop generation, useful when stop tokens are
# not special tokens in the tokenizer
stop_str: list[str] | None = None