[Chore] Use tokenizer.encode and tokenizer.decode directly (#29851)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-12-02 20:30:40 +08:00
committed by GitHub
parent 951445a52d
commit 68ffbca7e4
9 changed files with 36 additions and 40 deletions

View File

@@ -22,8 +22,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
from vllm.multimodal.inputs import MultiModalInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
from vllm.transformers_utils.tokenizer import encode_tokens
from vllm.tokenizers import (
MistralTokenizer,
TokenizerLike,
cached_tokenizer_from_config,
)
from ....multimodal.utils import random_audio, random_image, random_video
from ...registry import (
@@ -151,7 +154,7 @@ def get_text_token_prompts(
mm_data: MultiModalDataDict,
):
dummy_inputs = processor.dummy_inputs
tokenizer = processor.info.get_tokenizer()
tokenizer: TokenizerLike = processor.info.get_tokenizer()
model_config = processor.info.ctx.model_config
model_type = model_config.hf_config.model_type
@@ -188,10 +191,9 @@ def get_text_token_prompts(
assert isinstance(inputs.prompt, str)
text_prompt = inputs.prompt
token_prompt = encode_tokens(
tokenizer,
token_prompt = tokenizer.encode(
text_prompt,
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type),
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
)
return text_prompt, token_prompt