[Frontend] Consolidate tokenizer init code (#26276)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-06 17:34:52 +08:00
committed by GitHub
parent 77c95f72f7
commit 391612e78b
8 changed files with 46 additions and 70 deletions

View File

@@ -7,7 +7,6 @@ from vllm.config import ModelConfig
from vllm.inputs import zip_enc_dec_prompts
from vllm.inputs.parse import parse_raw_prompts
from vllm.inputs.preprocess import InputPreprocessor
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
pytestmark = pytest.mark.cpu_test
@@ -107,8 +106,7 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
)
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer)
input_preprocessor = InputPreprocessor(model_config)
with pytest.raises(ValueError, match="does not support multimodal inputs"):
input_preprocessor.preprocess(prompt)
@@ -129,8 +127,8 @@ def test_preprocessor_text_no_mm_inputs(model_id, prompt):
)
def test_preprocessor_always_mm_code_path(model_id, prompt):
model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer)
input_preprocessor = InputPreprocessor(model_config)
tokenizer = input_preprocessor.tokenizer
# HF processor adds sep token
sep_token_id = tokenizer.vocab[tokenizer.sep_token]