[Chore] Use tokenizer.encode and tokenizer.decode directly (#29851)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-02 20:30:40 +08:00
committed by GitHub
parent 951445a52d
commit 68ffbca7e4
9 changed files with 36 additions and 40 deletions

View File

@@ -22,8 +22,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.cache import MultiModalProcessorOnlyCache from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs from vllm.multimodal.inputs import MultiModalInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config from vllm.tokenizers import (
from vllm.transformers_utils.tokenizer import encode_tokens MistralTokenizer,
TokenizerLike,
cached_tokenizer_from_config,
)
from ....multimodal.utils import random_audio, random_image, random_video from ....multimodal.utils import random_audio, random_image, random_video
from ...registry import ( from ...registry import (
@@ -151,7 +154,7 @@ def get_text_token_prompts(
mm_data: MultiModalDataDict, mm_data: MultiModalDataDict,
): ):
dummy_inputs = processor.dummy_inputs dummy_inputs = processor.dummy_inputs
tokenizer = processor.info.get_tokenizer() tokenizer: TokenizerLike = processor.info.get_tokenizer()
model_config = processor.info.ctx.model_config model_config = processor.info.ctx.model_config
model_type = model_config.hf_config.model_type model_type = model_config.hf_config.model_type
@@ -188,10 +191,9 @@ def get_text_token_prompts(
assert isinstance(inputs.prompt, str) assert isinstance(inputs.prompt, str)
text_prompt = inputs.prompt text_prompt = inputs.prompt
token_prompt = encode_tokens( token_prompt = tokenizer.encode(
tokenizer,
text_prompt, text_prompt,
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type), add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
) )
return text_prompt, token_prompt return text_prompt, token_prompt

View File

@@ -5,7 +5,6 @@
import pytest import pytest
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.tokenizer import encode_tokens
from ....conftest import ImageTestAssets from ....conftest import ImageTestAssets
from ...utils import build_model_context from ...utils import build_model_context
@@ -48,7 +47,7 @@ def test_processor_override(
] ]
} }
if tokenized_prompt: if tokenized_prompt:
prompt = encode_tokens(tokenizer, prompt) prompt = tokenizer.encode(prompt)
processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs) processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
mm_data = processed_inputs["mm_kwargs"].get_data() mm_data = processed_inputs["mm_kwargs"].get_data()

View File

@@ -37,7 +37,7 @@ from vllm.inputs.data import PromptType
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor.models import SupportsTranscription from vllm.model_executor.models import SupportsTranscription
from vllm.outputs import RequestOutput from vllm.outputs import RequestOutput
from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.tokenizers import get_tokenizer
from vllm.utils.import_utils import PlaceholderModule from vllm.utils.import_utils import PlaceholderModule
try: try:

View File

@@ -33,7 +33,7 @@ class RenderConfig:
`0` yields an empty list (and skips embeds). `0` yields an empty list (and skips embeds).
`-1` maps to `model_config.max_model_len`.""" `-1` maps to `model_config.max_model_len`."""
add_special_tokens: bool | None = True add_special_tokens: bool = True
"""Whether to add model-specific special tokens during tokenization.""" """Whether to add model-specific special tokens during tokenization."""
cache_salt: str | None = None cache_salt: str | None = None
@@ -315,7 +315,7 @@ class CompletionRenderer(BaseRenderer):
text: str, text: str,
max_length: int | None, max_length: int | None,
truncate_prompt_tokens: int | None, truncate_prompt_tokens: int | None,
add_special_tokens: bool | None, add_special_tokens: bool,
cache_salt: str | None, cache_salt: str | None,
) -> EngineTokensPrompt: ) -> EngineTokensPrompt:
"""Tokenize text input asynchronously.""" """Tokenize text input asynchronously."""

View File

@@ -19,7 +19,7 @@ from vllm.inputs import TokensPrompt
from vllm.model_executor.models.interfaces import supports_score_template from vllm.model_executor.models.interfaces import supports_score_template
from vllm.multimodal.inputs import MultiModalDataDict from vllm.multimodal.inputs import MultiModalDataDict
from vllm.outputs import PoolingRequestOutput from vllm.outputs import PoolingRequestOutput
from vllm.transformers_utils.tokenizer import TokenizerLike from vllm.tokenizers import TokenizerLike
ScoreContentPartParam: TypeAlias = ( ScoreContentPartParam: TypeAlias = (
ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam

View File

@@ -75,7 +75,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
from vllm.transformers_utils.configs.radio import RadioConfig from vllm.transformers_utils.configs.radio import RadioConfig
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .utils import _merge_multimodal_embeddings from .utils import _merge_multimodal_embeddings
@@ -454,14 +453,12 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
# Pre-tokenize special tokens for video processing # Pre-tokenize special tokens for video processing
# to avoid repeated tokenization # to avoid repeated tokenization
self._img_start_token_ids = encode_tokens( self._img_start_token_ids = tokenizer.encode(
tokenizer, IMG_START, add_special_tokens=False IMG_START, add_special_tokens=False
) )
self._img_end_token_ids = encode_tokens( self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
tokenizer, IMG_END, add_special_tokens=False self._img_context_token_ids = tokenizer.encode(
) IMG_CONTEXT, add_special_tokens=False
self._img_context_token_ids = encode_tokens(
tokenizer, IMG_CONTEXT, add_special_tokens=False
) )
@property @property
@@ -1179,14 +1176,12 @@ class NemotronH_Nano_VL_V2(
# Pre-tokenize special tokens for video processing # Pre-tokenize special tokens for video processing
# to avoid repeated tokenization # to avoid repeated tokenization
tokenizer = cached_tokenizer_from_config(vllm_config.model_config) tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
self._img_start_token_ids = encode_tokens( self._img_start_token_ids = tokenizer.encode(
tokenizer, IMG_START, add_special_tokens=False IMG_START, add_special_tokens=False
) )
self._img_end_token_ids = encode_tokens( self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
tokenizer, IMG_END, add_special_tokens=False self._img_context_token_ids = tokenizer.encode(
) IMG_CONTEXT, add_special_tokens=False
self._img_context_token_ids = encode_tokens(
tokenizer, IMG_CONTEXT, add_special_tokens=False
) )
def pixel_shuffle(self, x, scale_factor=0.5): def pixel_shuffle(self, x, scale_factor=0.5):

View File

@@ -88,7 +88,6 @@ from vllm.multimodal.processing import (
) )
from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.utils.tensor_schema import TensorSchema, TensorShape from vllm.utils.tensor_schema import TensorSchema, TensorShape
from .interfaces import ( from .interfaces import (
@@ -591,7 +590,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
tokenization_kwargs=tokenization_kwargs, tokenization_kwargs=tokenization_kwargs,
) )
tokenizer = self.info.get_tokenizer() tokenizer = self.info.get_tokenizer()
prompt_ids = encode_tokens(tokenizer, prompt) prompt_ids = tokenizer.encode(prompt)
else: else:
prompt_ids = self._apply_hf_processor_tokens_only(prompt) prompt_ids = self._apply_hf_processor_tokens_only(prompt)

View File

@@ -25,7 +25,6 @@ from typing_extensions import TypeVar, assert_never
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
from vllm.transformers_utils.processor import cached_processor_from_config from vllm.transformers_utils.processor import cached_processor_from_config
from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens
from vllm.utils.collection_utils import flatten_2d_lists, full_groupby from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
from vllm.utils.jsontree import JSONTree, json_map_leaves from vllm.utils.jsontree import JSONTree, json_map_leaves
@@ -80,9 +79,9 @@ def _cached_encode(
tokenizer: TokenizerLike, tokenizer: TokenizerLike,
text: str, text: str,
*, *,
add_special_tokens: bool | None = None, add_special_tokens: bool = True,
) -> list[int]: ) -> list[int]:
return encode_tokens(tokenizer, text, add_special_tokens=add_special_tokens) return tokenizer.encode(text, add_special_tokens=add_special_tokens)
@lru_cache(maxsize=2048) @lru_cache(maxsize=2048)
@@ -90,11 +89,9 @@ def _cached_decode(
tokenizer: TokenizerLike, tokenizer: TokenizerLike,
token_ids: tuple[int, ...], token_ids: tuple[int, ...],
*, *,
skip_special_tokens: bool | None = None, skip_special_tokens: bool = False,
) -> str: ) -> str:
return decode_tokens( return tokenizer.decode(list(token_ids), skip_special_tokens=skip_special_tokens)
tokenizer, list(token_ids), skip_special_tokens=skip_special_tokens
)
def _seq2text( def _seq2text(
@@ -110,7 +107,7 @@ def _seq2text(
raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`") raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`")
if not use_cache: if not use_cache:
return decode_tokens(tokenizer, seq) return tokenizer.decode(seq)
return _cached_decode(tokenizer, tuple(seq)) return _cached_decode(tokenizer, tuple(seq))
@@ -126,7 +123,7 @@ def _seq2tokens(
raise ValueError("You cannot encode text when `skip_tokenizer_init=True`") raise ValueError("You cannot encode text when `skip_tokenizer_init=True`")
if not use_cache: if not use_cache:
return encode_tokens(tokenizer, seq, add_special_tokens=False) return tokenizer.encode(seq, add_special_tokens=False)
return _cached_encode(tokenizer, seq, add_special_tokens=False) return _cached_encode(tokenizer, seq, add_special_tokens=False)
@@ -2198,8 +2195,8 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
tokenizer = self.info.get_tokenizer() tokenizer = self.info.get_tokenizer()
decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data) decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data)
if isinstance(decoder_prompt_raw, str): if isinstance(decoder_prompt_raw, str):
decoder_prompt_ids = encode_tokens( decoder_prompt_ids = tokenizer.encode(
tokenizer, decoder_prompt_raw, add_special_tokens=False decoder_prompt_raw, add_special_tokens=False
) )
else: else:
decoder_prompt_ids = decoder_prompt_raw decoder_prompt_ids = decoder_prompt_raw

View File

@@ -4,6 +4,8 @@
import warnings import warnings
from typing import Any from typing import Any
from typing_extensions import deprecated
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike from vllm.tokenizers import TokenizerLike
@@ -73,6 +75,7 @@ def __getattr__(name: str):
raise AttributeError(f"module {__name__!r} has no attribute {name!r}") raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
@deprecated("Will be removed in v0.13. Please use `tokenizer.decode()` instead.")
def decode_tokens( def decode_tokens(
tokenizer: TokenizerLike, tokenizer: TokenizerLike,
token_ids: list[int], token_ids: list[int],
@@ -94,6 +97,7 @@ def decode_tokens(
return tokenizer.decode(token_ids, **kw_args) return tokenizer.decode(token_ids, **kw_args)
@deprecated("Will be removed in v0.13. Please use `tokenizer.encode()` instead.")
def encode_tokens( def encode_tokens(
tokenizer: TokenizerLike, tokenizer: TokenizerLike,
text: str, text: str,