[Chore] Use tokenizer.encode and tokenizer.decode directly (#29851)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -22,8 +22,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
|
|||||||
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
||||||
from vllm.multimodal.inputs import MultiModalInputs
|
from vllm.multimodal.inputs import MultiModalInputs
|
||||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
from vllm.tokenizers import (
|
||||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
MistralTokenizer,
|
||||||
|
TokenizerLike,
|
||||||
|
cached_tokenizer_from_config,
|
||||||
|
)
|
||||||
|
|
||||||
from ....multimodal.utils import random_audio, random_image, random_video
|
from ....multimodal.utils import random_audio, random_image, random_video
|
||||||
from ...registry import (
|
from ...registry import (
|
||||||
@@ -151,7 +154,7 @@ def get_text_token_prompts(
|
|||||||
mm_data: MultiModalDataDict,
|
mm_data: MultiModalDataDict,
|
||||||
):
|
):
|
||||||
dummy_inputs = processor.dummy_inputs
|
dummy_inputs = processor.dummy_inputs
|
||||||
tokenizer = processor.info.get_tokenizer()
|
tokenizer: TokenizerLike = processor.info.get_tokenizer()
|
||||||
model_config = processor.info.ctx.model_config
|
model_config = processor.info.ctx.model_config
|
||||||
|
|
||||||
model_type = model_config.hf_config.model_type
|
model_type = model_config.hf_config.model_type
|
||||||
@@ -188,10 +191,9 @@ def get_text_token_prompts(
|
|||||||
assert isinstance(inputs.prompt, str)
|
assert isinstance(inputs.prompt, str)
|
||||||
|
|
||||||
text_prompt = inputs.prompt
|
text_prompt = inputs.prompt
|
||||||
token_prompt = encode_tokens(
|
token_prompt = tokenizer.encode(
|
||||||
tokenizer,
|
|
||||||
text_prompt,
|
text_prompt,
|
||||||
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type),
|
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
|
||||||
)
|
)
|
||||||
|
|
||||||
return text_prompt, token_prompt
|
return text_prompt, token_prompt
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
|
||||||
|
|
||||||
from ....conftest import ImageTestAssets
|
from ....conftest import ImageTestAssets
|
||||||
from ...utils import build_model_context
|
from ...utils import build_model_context
|
||||||
@@ -48,7 +47,7 @@ def test_processor_override(
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
if tokenized_prompt:
|
if tokenized_prompt:
|
||||||
prompt = encode_tokens(tokenizer, prompt)
|
prompt = tokenizer.encode(prompt)
|
||||||
|
|
||||||
processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
|
processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
|
||||||
mm_data = processed_inputs["mm_kwargs"].get_data()
|
mm_data = processed_inputs["mm_kwargs"].get_data()
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ from vllm.inputs.data import PromptType
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.model_executor.models import SupportsTranscription
|
from vllm.model_executor.models import SupportsTranscription
|
||||||
from vllm.outputs import RequestOutput
|
from vllm.outputs import RequestOutput
|
||||||
from vllm.transformers_utils.tokenizer import get_tokenizer
|
from vllm.tokenizers import get_tokenizer
|
||||||
from vllm.utils.import_utils import PlaceholderModule
|
from vllm.utils.import_utils import PlaceholderModule
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ class RenderConfig:
|
|||||||
`0` yields an empty list (and skips embeds).
|
`0` yields an empty list (and skips embeds).
|
||||||
`-1` maps to `model_config.max_model_len`."""
|
`-1` maps to `model_config.max_model_len`."""
|
||||||
|
|
||||||
add_special_tokens: bool | None = True
|
add_special_tokens: bool = True
|
||||||
"""Whether to add model-specific special tokens during tokenization."""
|
"""Whether to add model-specific special tokens during tokenization."""
|
||||||
|
|
||||||
cache_salt: str | None = None
|
cache_salt: str | None = None
|
||||||
@@ -315,7 +315,7 @@ class CompletionRenderer(BaseRenderer):
|
|||||||
text: str,
|
text: str,
|
||||||
max_length: int | None,
|
max_length: int | None,
|
||||||
truncate_prompt_tokens: int | None,
|
truncate_prompt_tokens: int | None,
|
||||||
add_special_tokens: bool | None,
|
add_special_tokens: bool,
|
||||||
cache_salt: str | None,
|
cache_salt: str | None,
|
||||||
) -> EngineTokensPrompt:
|
) -> EngineTokensPrompt:
|
||||||
"""Tokenize text input asynchronously."""
|
"""Tokenize text input asynchronously."""
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ from vllm.inputs import TokensPrompt
|
|||||||
from vllm.model_executor.models.interfaces import supports_score_template
|
from vllm.model_executor.models.interfaces import supports_score_template
|
||||||
from vllm.multimodal.inputs import MultiModalDataDict
|
from vllm.multimodal.inputs import MultiModalDataDict
|
||||||
from vllm.outputs import PoolingRequestOutput
|
from vllm.outputs import PoolingRequestOutput
|
||||||
from vllm.transformers_utils.tokenizer import TokenizerLike
|
from vllm.tokenizers import TokenizerLike
|
||||||
|
|
||||||
ScoreContentPartParam: TypeAlias = (
|
ScoreContentPartParam: TypeAlias = (
|
||||||
ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam
|
ChatCompletionContentPartImageParam | ChatCompletionContentPartImageEmbedsParam
|
||||||
|
|||||||
@@ -75,7 +75,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
|||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
from vllm.tokenizers import TokenizerLike, cached_tokenizer_from_config
|
||||||
from vllm.transformers_utils.configs.radio import RadioConfig
|
from vllm.transformers_utils.configs.radio import RadioConfig
|
||||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
|
|
||||||
from .utils import _merge_multimodal_embeddings
|
from .utils import _merge_multimodal_embeddings
|
||||||
@@ -454,14 +453,12 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
|
|||||||
|
|
||||||
# Pre-tokenize special tokens for video processing
|
# Pre-tokenize special tokens for video processing
|
||||||
# to avoid repeated tokenization
|
# to avoid repeated tokenization
|
||||||
self._img_start_token_ids = encode_tokens(
|
self._img_start_token_ids = tokenizer.encode(
|
||||||
tokenizer, IMG_START, add_special_tokens=False
|
IMG_START, add_special_tokens=False
|
||||||
)
|
)
|
||||||
self._img_end_token_ids = encode_tokens(
|
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
|
||||||
tokenizer, IMG_END, add_special_tokens=False
|
self._img_context_token_ids = tokenizer.encode(
|
||||||
)
|
IMG_CONTEXT, add_special_tokens=False
|
||||||
self._img_context_token_ids = encode_tokens(
|
|
||||||
tokenizer, IMG_CONTEXT, add_special_tokens=False
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -1179,14 +1176,12 @@ class NemotronH_Nano_VL_V2(
|
|||||||
# Pre-tokenize special tokens for video processing
|
# Pre-tokenize special tokens for video processing
|
||||||
# to avoid repeated tokenization
|
# to avoid repeated tokenization
|
||||||
tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
|
tokenizer = cached_tokenizer_from_config(vllm_config.model_config)
|
||||||
self._img_start_token_ids = encode_tokens(
|
self._img_start_token_ids = tokenizer.encode(
|
||||||
tokenizer, IMG_START, add_special_tokens=False
|
IMG_START, add_special_tokens=False
|
||||||
)
|
)
|
||||||
self._img_end_token_ids = encode_tokens(
|
self._img_end_token_ids = tokenizer.encode(IMG_END, add_special_tokens=False)
|
||||||
tokenizer, IMG_END, add_special_tokens=False
|
self._img_context_token_ids = tokenizer.encode(
|
||||||
)
|
IMG_CONTEXT, add_special_tokens=False
|
||||||
self._img_context_token_ids = encode_tokens(
|
|
||||||
tokenizer, IMG_CONTEXT, add_special_tokens=False
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def pixel_shuffle(self, x, scale_factor=0.5):
|
def pixel_shuffle(self, x, scale_factor=0.5):
|
||||||
|
|||||||
@@ -88,7 +88,6 @@ from vllm.multimodal.processing import (
|
|||||||
)
|
)
|
||||||
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
from vllm.multimodal.profiling import BaseDummyInputsBuilder
|
||||||
from vllm.sequence import IntermediateTensors
|
from vllm.sequence import IntermediateTensors
|
||||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
|
||||||
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
from vllm.utils.tensor_schema import TensorSchema, TensorShape
|
||||||
|
|
||||||
from .interfaces import (
|
from .interfaces import (
|
||||||
@@ -591,7 +590,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
|||||||
tokenization_kwargs=tokenization_kwargs,
|
tokenization_kwargs=tokenization_kwargs,
|
||||||
)
|
)
|
||||||
tokenizer = self.info.get_tokenizer()
|
tokenizer = self.info.get_tokenizer()
|
||||||
prompt_ids = encode_tokens(tokenizer, prompt)
|
prompt_ids = tokenizer.encode(prompt)
|
||||||
else:
|
else:
|
||||||
prompt_ids = self._apply_hf_processor_tokens_only(prompt)
|
prompt_ids = self._apply_hf_processor_tokens_only(prompt)
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ from typing_extensions import TypeVar, assert_never
|
|||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike
|
||||||
from vllm.transformers_utils.processor import cached_processor_from_config
|
from vllm.transformers_utils.processor import cached_processor_from_config
|
||||||
from vllm.transformers_utils.tokenizer import decode_tokens, encode_tokens
|
|
||||||
from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
|
from vllm.utils.collection_utils import flatten_2d_lists, full_groupby
|
||||||
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
from vllm.utils.func_utils import get_allowed_kwarg_only_overrides
|
||||||
from vllm.utils.jsontree import JSONTree, json_map_leaves
|
from vllm.utils.jsontree import JSONTree, json_map_leaves
|
||||||
@@ -80,9 +79,9 @@ def _cached_encode(
|
|||||||
tokenizer: TokenizerLike,
|
tokenizer: TokenizerLike,
|
||||||
text: str,
|
text: str,
|
||||||
*,
|
*,
|
||||||
add_special_tokens: bool | None = None,
|
add_special_tokens: bool = True,
|
||||||
) -> list[int]:
|
) -> list[int]:
|
||||||
return encode_tokens(tokenizer, text, add_special_tokens=add_special_tokens)
|
return tokenizer.encode(text, add_special_tokens=add_special_tokens)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache(maxsize=2048)
|
@lru_cache(maxsize=2048)
|
||||||
@@ -90,11 +89,9 @@ def _cached_decode(
|
|||||||
tokenizer: TokenizerLike,
|
tokenizer: TokenizerLike,
|
||||||
token_ids: tuple[int, ...],
|
token_ids: tuple[int, ...],
|
||||||
*,
|
*,
|
||||||
skip_special_tokens: bool | None = None,
|
skip_special_tokens: bool = False,
|
||||||
) -> str:
|
) -> str:
|
||||||
return decode_tokens(
|
return tokenizer.decode(list(token_ids), skip_special_tokens=skip_special_tokens)
|
||||||
tokenizer, list(token_ids), skip_special_tokens=skip_special_tokens
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _seq2text(
|
def _seq2text(
|
||||||
@@ -110,7 +107,7 @@ def _seq2text(
|
|||||||
raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`")
|
raise ValueError("You cannot decode tokens when `skip_tokenizer_init=True`")
|
||||||
|
|
||||||
if not use_cache:
|
if not use_cache:
|
||||||
return decode_tokens(tokenizer, seq)
|
return tokenizer.decode(seq)
|
||||||
|
|
||||||
return _cached_decode(tokenizer, tuple(seq))
|
return _cached_decode(tokenizer, tuple(seq))
|
||||||
|
|
||||||
@@ -126,7 +123,7 @@ def _seq2tokens(
|
|||||||
raise ValueError("You cannot encode text when `skip_tokenizer_init=True`")
|
raise ValueError("You cannot encode text when `skip_tokenizer_init=True`")
|
||||||
|
|
||||||
if not use_cache:
|
if not use_cache:
|
||||||
return encode_tokens(tokenizer, seq, add_special_tokens=False)
|
return tokenizer.encode(seq, add_special_tokens=False)
|
||||||
|
|
||||||
return _cached_encode(tokenizer, seq, add_special_tokens=False)
|
return _cached_encode(tokenizer, seq, add_special_tokens=False)
|
||||||
|
|
||||||
@@ -2198,8 +2195,8 @@ class EncDecMultiModalProcessor(BaseMultiModalProcessor[_I]):
|
|||||||
tokenizer = self.info.get_tokenizer()
|
tokenizer = self.info.get_tokenizer()
|
||||||
decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data)
|
decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data)
|
||||||
if isinstance(decoder_prompt_raw, str):
|
if isinstance(decoder_prompt_raw, str):
|
||||||
decoder_prompt_ids = encode_tokens(
|
decoder_prompt_ids = tokenizer.encode(
|
||||||
tokenizer, decoder_prompt_raw, add_special_tokens=False
|
decoder_prompt_raw, add_special_tokens=False
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
decoder_prompt_ids = decoder_prompt_raw
|
decoder_prompt_ids = decoder_prompt_raw
|
||||||
|
|||||||
@@ -4,6 +4,8 @@
|
|||||||
import warnings
|
import warnings
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
from typing_extensions import deprecated
|
||||||
|
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
from vllm.tokenizers import TokenizerLike
|
from vllm.tokenizers import TokenizerLike
|
||||||
|
|
||||||
@@ -73,6 +75,7 @@ def __getattr__(name: str):
|
|||||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated("Will be removed in v0.13. Please use `tokenizer.decode()` instead.")
|
||||||
def decode_tokens(
|
def decode_tokens(
|
||||||
tokenizer: TokenizerLike,
|
tokenizer: TokenizerLike,
|
||||||
token_ids: list[int],
|
token_ids: list[int],
|
||||||
@@ -94,6 +97,7 @@ def decode_tokens(
|
|||||||
return tokenizer.decode(token_ids, **kw_args)
|
return tokenizer.decode(token_ids, **kw_args)
|
||||||
|
|
||||||
|
|
||||||
|
@deprecated("Will be removed in v0.13. Please use `tokenizer.encode()` instead.")
|
||||||
def encode_tokens(
|
def encode_tokens(
|
||||||
tokenizer: TokenizerLike,
|
tokenizer: TokenizerLike,
|
||||||
text: str,
|
text: str,
|
||||||
|
|||||||
Reference in New Issue
Block a user