[Bugfix] Always apply MM processor even when no MM items are passed (#26240)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -46,7 +46,6 @@ from vllm.connections import global_http_connection
|
||||
from vllm.distributed import (cleanup_dist_env_and_memory,
|
||||
init_distributed_environment,
|
||||
initialize_model_parallel)
|
||||
from vllm.inputs import TextPrompt
|
||||
from vllm.logger import init_logger
|
||||
from vllm.logprobs import Logprob
|
||||
from vllm.multimodal.utils import fetch_image
|
||||
@@ -760,17 +759,24 @@ class VllmRunner:
|
||||
images: Optional[PromptImageInput] = None,
|
||||
videos: Optional[PromptVideoInput] = None,
|
||||
audios: Optional[PromptAudioInput] = None,
|
||||
) -> list[TextPrompt]:
|
||||
|
||||
) -> list[dict[str, Any]]:
|
||||
if any(x is not None and len(x) != len(prompts)
|
||||
for x in [images, videos, audios]):
|
||||
raise ValueError(
|
||||
"All non-None multimodal inputs must have the same length as "
|
||||
"prompts")
|
||||
|
||||
inputs = []
|
||||
inputs = list[dict[str, Any]]()
|
||||
for i, prompt in enumerate(prompts):
|
||||
multi_modal_data = {}
|
||||
prompt_dict = dict[str, Any]()
|
||||
if isinstance(prompt, str):
|
||||
prompt_dict["prompt"] = prompt
|
||||
elif isinstance(prompt, list):
|
||||
prompt_dict["prompt_token_ids"] = prompt
|
||||
else:
|
||||
prompt_dict["prompt_embeds"] = prompt
|
||||
|
||||
multi_modal_data = dict[str, Any]()
|
||||
if images is not None and (image := images[i]) is not None:
|
||||
multi_modal_data["image"] = image
|
||||
if videos is not None and (video := videos[i]) is not None:
|
||||
@@ -778,17 +784,10 @@ class VllmRunner:
|
||||
if audios is not None and (audio := audios[i]) is not None:
|
||||
multi_modal_data["audio"] = audio
|
||||
|
||||
text_prompt_kwargs: dict[str, Any] = {
|
||||
"multi_modal_data": multi_modal_data or None
|
||||
}
|
||||
if isinstance(prompt, str):
|
||||
text_prompt_kwargs["prompt"] = prompt
|
||||
elif isinstance(prompt, list):
|
||||
text_prompt_kwargs["prompt_token_ids"] = prompt
|
||||
else:
|
||||
text_prompt_kwargs["prompt_embeds"] = prompt
|
||||
if multi_modal_data:
|
||||
prompt_dict["multi_modal_data"] = multi_modal_data
|
||||
|
||||
inputs.append(TextPrompt(**text_prompt_kwargs))
|
||||
inputs.append(prompt_dict)
|
||||
|
||||
return inputs
|
||||
|
||||
|
||||
Reference in New Issue
Block a user