[Core][Frontend] Add Support for Inference Time mm_processor_kwargs (#9131)

Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
This commit is contained in:
Alex Brooks
2024-10-08 08:12:56 -06:00
committed by GitHub
parent 8c746226c9
commit a3691b6b5e
21 changed files with 440 additions and 118 deletions

View File

@@ -1,5 +1,5 @@
from functools import lru_cache
from typing import List, Union
from typing import Any, Dict, List, Optional, Union
import numpy as np
@@ -36,11 +36,13 @@ class VideoPlugin(ImagePlugin):
def get_data_key(self) -> str:
return "video"
def _get_hf_video_processor(self, model_config: ModelConfig):
mm_processor_kwargs = ({} if model_config.mm_processor_kwargs is None
else model_config.mm_processor_kwargs)
# We don't explicitly check kwarg overrides to the HF class
# since the automodel just takes kwargs, so we can't inspect it
def _get_hf_video_processor(
self,
model_config: ModelConfig,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
):
if mm_processor_kwargs is None:
mm_processor_kwargs = {}
return cached_get_video_processor(
model_config.model,
trust_remote_code=model_config.trust_remote_code,
@@ -50,16 +52,24 @@ class VideoPlugin(ImagePlugin):
self,
ctx: InputContext,
data: MultiModalData[object],
**mm_processor_kwargs,
) -> MultiModalInputs:
model_config = ctx.model_config
# single video input as np.ndarray
if isinstance(data, np.ndarray):
video_processor = self._get_hf_video_processor(model_config)
video_processor = self._get_hf_video_processor(
model_config,
mm_processor_kwargs,
)
if video_processor is None:
raise RuntimeError("No HuggingFace processor is available "
"to process the image object")
try:
# NOTE: Similar to image; it may be a good idea to filter and
# pass mm_processor_kwargs here too, but for now we don't to
# avoid extra complexity if the initializer and preprocess
# signatures of the processor don't align
batch_data = video_processor(data, return_tensors="pt").data
except Exception:
logger.error("Failed to process image (%s)", data)