[Bugfix] Always apply MM processor even when no MM items are passed (#26240)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -3,8 +3,11 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.inputs import zip_enc_dec_prompts
|
||||
from vllm.inputs.parse import parse_raw_prompts
|
||||
from vllm.inputs.preprocess import InputPreprocessor
|
||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@@ -80,3 +83,50 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
|
||||
assert zipped['encoder_prompt'] == enc
|
||||
assert zipped['decoder_prompt'] == dec
|
||||
assert zipped['mm_processor_kwargs'] == exp_kwargs
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [
|
||||
"facebook/opt-125m",
|
||||
])
|
||||
@pytest.mark.parametrize("prompt", [
|
||||
{
|
||||
"prompt": "",
|
||||
"multi_modal_data": {
|
||||
"dummy": []
|
||||
},
|
||||
},
|
||||
{
|
||||
"prompt_token_ids": [],
|
||||
"multi_modal_data": {
|
||||
"dummy": []
|
||||
},
|
||||
},
|
||||
])
|
||||
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
|
||||
model_config = ModelConfig(model=model_id)
|
||||
tokenizer = init_tokenizer_from_configs(model_config)
|
||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
||||
|
||||
with pytest.raises(ValueError, match="does not support multimodal inputs"):
|
||||
input_preprocessor.preprocess(prompt)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [
|
||||
"facebook/chameleon-7b",
|
||||
])
|
||||
@pytest.mark.parametrize("prompt", [
|
||||
"",
|
||||
{
|
||||
"prompt_token_ids": []
|
||||
},
|
||||
])
|
||||
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
||||
model_config = ModelConfig(model=model_id)
|
||||
tokenizer = init_tokenizer_from_configs(model_config)
|
||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
||||
|
||||
# HF processor adds sep token
|
||||
sep_token_id = tokenizer.vocab[tokenizer.sep_token]
|
||||
|
||||
processed_inputs = input_preprocessor.preprocess(prompt)
|
||||
assert sep_token_id in processed_inputs["prompt_token_ids"]
|
||||
|
||||
Reference in New Issue
Block a user