[Chore] Use tokenizer.encode and tokenizer.decode directly (#29851)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -22,8 +22,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict
|
||||
from vllm.multimodal.cache import MultiModalProcessorOnlyCache
|
||||
from vllm.multimodal.inputs import MultiModalInputs
|
||||
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
|
||||
from vllm.tokenizers import MistralTokenizer, cached_tokenizer_from_config
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
from vllm.tokenizers import (
|
||||
MistralTokenizer,
|
||||
TokenizerLike,
|
||||
cached_tokenizer_from_config,
|
||||
)
|
||||
|
||||
from ....multimodal.utils import random_audio, random_image, random_video
|
||||
from ...registry import (
|
||||
@@ -151,7 +154,7 @@ def get_text_token_prompts(
|
||||
mm_data: MultiModalDataDict,
|
||||
):
|
||||
dummy_inputs = processor.dummy_inputs
|
||||
tokenizer = processor.info.get_tokenizer()
|
||||
tokenizer: TokenizerLike = processor.info.get_tokenizer()
|
||||
model_config = processor.info.ctx.model_config
|
||||
|
||||
model_type = model_config.hf_config.model_type
|
||||
@@ -188,10 +191,9 @@ def get_text_token_prompts(
|
||||
assert isinstance(inputs.prompt, str)
|
||||
|
||||
text_prompt = inputs.prompt
|
||||
token_prompt = encode_tokens(
|
||||
tokenizer,
|
||||
token_prompt = tokenizer.encode(
|
||||
text_prompt,
|
||||
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type),
|
||||
add_special_tokens=_ADD_SPECIAL_TOKENS_OVERRIDES.get(model_type, True),
|
||||
)
|
||||
|
||||
return text_prompt, token_prompt
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
import pytest
|
||||
|
||||
from vllm.multimodal import MULTIMODAL_REGISTRY
|
||||
from vllm.transformers_utils.tokenizer import encode_tokens
|
||||
|
||||
from ....conftest import ImageTestAssets
|
||||
from ...utils import build_model_context
|
||||
@@ -48,7 +47,7 @@ def test_processor_override(
|
||||
]
|
||||
}
|
||||
if tokenized_prompt:
|
||||
prompt = encode_tokens(tokenizer, prompt)
|
||||
prompt = tokenizer.encode(prompt)
|
||||
|
||||
processed_inputs = processor.apply(prompt, mm_data, mm_processor_kwargs)
|
||||
mm_data = processed_inputs["mm_kwargs"].get_data()
|
||||
|
||||
Reference in New Issue
Block a user