[Bugfix] Always apply MM processor even when no MM items are passed (#26240)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -46,7 +46,6 @@ from vllm.connections import global_http_connection
|
||||
from vllm.distributed import (cleanup_dist_env_and_memory,
|
||||
init_distributed_environment,
|
||||
initialize_model_parallel)
|
||||
from vllm.inputs import TextPrompt
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logprobs import Logprob
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
@@ -760,17 +759,24 @@ class VllmRunner:
|
||||
images: Optional[PromptImageInput] = None,
|
||||
videos: Optional[PromptVideoInput] = None,
|
||||
audios: Optional[PromptAudioInput] = None,
|
||||
) -> list[TextPrompt]:
|
||||
|
||||
) -> list[dict[str, Any]]:
|
||||
if any(x is not None and len(x) != len(prompts)
|
||||
for x in [images, videos, audios]):
|
||||
raise ValueError(
|
||||
"All non-None multimodal inputs must have the same length as "
|
||||
"prompts")
|
||||
|
||||
inputs = []
|
||||
inputs = list[dict[str, Any]]()
|
||||
for i, prompt in enumerate(prompts):
|
||||
multi_modal_data = {}
|
||||
prompt_dict = dict[str, Any]()
|
||||
if isinstance(prompt, str):
|
||||
prompt_dict["prompt"] = prompt
|
||||
elif isinstance(prompt, list):
|
||||
prompt_dict["prompt_token_ids"] = prompt
|
||||
else:
|
||||
prompt_dict["prompt_embeds"] = prompt
|
||||
|
||||
multi_modal_data = dict[str, Any]()
|
||||
if images is not None and (image := images[i]) is not None:
|
||||
multi_modal_data["image"] = image
|
||||
if videos is not None and (video := videos[i]) is not None:
|
||||
@@ -778,17 +784,10 @@ class VllmRunner:
|
||||
if audios is not None and (audio := audios[i]) is not None:
|
||||
multi_modal_data["audio"] = audio
|
||||
|
||||
text_prompt_kwargs: dict[str, Any] = {
|
||||
"multi_modal_data": multi_modal_data or None
|
||||
}
|
||||
if isinstance(prompt, str):
|
||||
text_prompt_kwargs["prompt"] = prompt
|
||||
elif isinstance(prompt, list):
|
||||
text_prompt_kwargs["prompt_token_ids"] = prompt
|
||||
else:
|
||||
text_prompt_kwargs["prompt_embeds"] = prompt
|
||||
if multi_modal_data:
|
||||
prompt_dict["multi_modal_data"] = multi_modal_data
|
||||
|
||||
inputs.append(TextPrompt(**text_prompt_kwargs))
|
||||
inputs.append(prompt_dict)
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
@@ -3,8 +3,11 @@
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig
|
||||
from vllm.inputs import zip_enc_dec_prompts
|
||||
from vllm.inputs.parse import parse_raw_prompts
|
||||
from vllm.inputs.preprocess import InputPreprocessor
|
||||
from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
@@ -80,3 +83,50 @@ def test_zip_enc_dec_prompts(mm_processor_kwargs, expected_mm_kwargs):
|
||||
assert zipped['encoder_prompt'] == enc
|
||||
assert zipped['decoder_prompt'] == dec
|
||||
assert zipped['mm_processor_kwargs'] == exp_kwargs
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [
|
||||
"facebook/opt-125m",
|
||||
])
|
||||
@pytest.mark.parametrize("prompt", [
|
||||
{
|
||||
"prompt": "",
|
||||
"multi_modal_data": {
|
||||
"dummy": []
|
||||
},
|
||||
},
|
||||
{
|
||||
"prompt_token_ids": [],
|
||||
"multi_modal_data": {
|
||||
"dummy": []
|
||||
},
|
||||
},
|
||||
])
|
||||
def test_preprocessor_text_no_mm_inputs(model_id, prompt):
|
||||
model_config = ModelConfig(model=model_id)
|
||||
tokenizer = init_tokenizer_from_configs(model_config)
|
||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
||||
|
||||
with pytest.raises(ValueError, match="does not support multimodal inputs"):
|
||||
input_preprocessor.preprocess(prompt)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", [
|
||||
"facebook/chameleon-7b",
|
||||
])
|
||||
@pytest.mark.parametrize("prompt", [
|
||||
"",
|
||||
{
|
||||
"prompt_token_ids": []
|
||||
},
|
||||
])
|
||||
def test_preprocessor_always_mm_code_path(model_id, prompt):
|
||||
model_config = ModelConfig(model=model_id)
|
||||
tokenizer = init_tokenizer_from_configs(model_config)
|
||||
input_preprocessor = InputPreprocessor(model_config, tokenizer)
|
||||
|
||||
# HF processor adds sep token
|
||||
sep_token_id = tokenizer.vocab[tokenizer.sep_token]
|
||||
|
||||
processed_inputs = input_preprocessor.preprocess(prompt)
|
||||
assert sep_token_id in processed_inputs["prompt_token_ids"]
|
||||
|
||||
Reference in New Issue
Block a user