[Bugfix] Always apply MM processor even when no MM items are passed (#26240)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-10-05 18:10:20 +08:00
committed by GitHub
parent 432e1cbc23
commit b7e8e4e6be
6 changed files with 102 additions and 30 deletions

View File

@@ -3,8 +3,11 @@
import pytest
from vllm.config import ModelConfig
from vllm.inputs import zip_enc_dec_prompts
from vllm.inputs.parse import parse_raw_prompts
from vllm.inputs.preprocess import InputPreprocessor
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
pytestmark = pytest.mark.cpu_test
@@ -80,3 +83,50 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
assert zipped['encoder_prompt'] == enc
assert zipped['decoder_prompt'] == dec
assert zipped['mm_processor_kwargs'] == exp_kwargs
@pytest.mark.parametrize("model_id", [
"facebook/opt-125m",
])
@pytest.mark.parametrize("prompt", [
{
"prompt": "",
"multi_modal_data": {
"dummy": []
},
},
{
"prompt_token_ids": [],
"multi_modal_data": {
"dummy": []
},
},
])
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer)
with pytest.raises(ValueError, match="does not support multimodal inputs"):
input_preprocessor.preprocess(prompt)
@pytest.mark.parametrize("model_id", [
"facebook/chameleon-7b",
])
@pytest.mark.parametrize("prompt", [
"",
{
"prompt_token_ids": []
},
])
def test_preprocessor_always_mm_code_path(model_id, prompt):
model_config = ModelConfig(model=model_id)
tokenizer = init_tokenizer_from_configs(model_config)
input_preprocessor = InputPreprocessor(model_config, tokenizer)
# HF processor adds sep token
sep_token_id = tokenizer.vocab[tokenizer.sep_token]
processed_inputs = input_preprocessor.preprocess(prompt)
assert sep_token_id in processed_inputs["prompt_token_ids"]