[Core][Frontend] Support Passing Multimodal Processor Kwargs (#8657)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
This commit is contained in:
Alex Brooks
2024-09-23 01:44:48 -06:00
committed by GitHub
parent d23679eb99
commit 9b8c8ba119
16 changed files with 590 additions and 117 deletions

View File

@@ -6,7 +6,7 @@ import numpy as np
from vllm.config import ModelConfig
from vllm.inputs.registry import InputContext
from vllm.logger import init_logger
from vllm.transformers_utils.image_processor import get_video_processor
from vllm.transformers_utils.processor import get_video_processor
from vllm.transformers_utils.tokenizer import get_tokenizer
from vllm.utils import is_list_of
@@ -37,9 +37,14 @@ class VideoPlugin(ImagePlugin):
return "video"
def _get_hf_video_processor(self, model_config: ModelConfig):
mm_processor_kwargs = ({} if model_config.mm_processor_kwargs is None
else model_config.mm_processor_kwargs)
# We don't explicitly check kwarg overrides to the HF class
# since the automodel just takes kwargs, so we can't inspect it
return cached_get_video_processor(
model_config.model,
trust_remote_code=model_config.trust_remote_code)
trust_remote_code=model_config.trust_remote_code,
**mm_processor_kwargs)
def _default_input_mapper(
self,