[Core][Frontend] Add Support for Inference Time mm_processor_kwargs (#9131)
Signed-off-by: Alex-Brooks <Alex.Brooks@ibm.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from functools import lru_cache
|
||||
from typing import List, Union
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
@@ -36,11 +36,13 @@ class VideoPlugin(ImagePlugin):
|
||||
def get_data_key(self) -> str:
|
||||
return "video"
|
||||
|
||||
def _get_hf_video_processor(self, model_config: ModelConfig):
|
||||
mm_processor_kwargs = ({} if model_config.mm_processor_kwargs is None
|
||||
else model_config.mm_processor_kwargs)
|
||||
# We don't explicitly check kwarg overrides to the HF class
|
||||
# since the automodel just takes kwargs, so we can't inspect it
|
||||
def _get_hf_video_processor(
|
||||
self,
|
||||
model_config: ModelConfig,
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
if mm_processor_kwargs is None:
|
||||
mm_processor_kwargs = {}
|
||||
return cached_get_video_processor(
|
||||
model_config.model,
|
||||
trust_remote_code=model_config.trust_remote_code,
|
||||
@@ -50,16 +52,24 @@ class VideoPlugin(ImagePlugin):
|
||||
self,
|
||||
ctx: InputContext,
|
||||
data: MultiModalData[object],
|
||||
**mm_processor_kwargs,
|
||||
) -> MultiModalInputs:
|
||||
model_config = ctx.model_config
|
||||
|
||||
# single video input as np.ndarray
|
||||
if isinstance(data, np.ndarray):
|
||||
video_processor = self._get_hf_video_processor(model_config)
|
||||
video_processor = self._get_hf_video_processor(
|
||||
model_config,
|
||||
mm_processor_kwargs,
|
||||
)
|
||||
if video_processor is None:
|
||||
raise RuntimeError("No HuggingFace processor is available "
|
||||
"to process the image object")
|
||||
try:
|
||||
# NOTE: Similar to image; it may be a good idea to filter and
|
||||
# pass mm_processor_kwargs here too, but for now we don't to
|
||||
# avoid extra complexity if the initializer and preprocess
|
||||
# signatures of the processor don't align
|
||||
batch_data = video_processor(data, return_tensors="pt").data
|
||||
except Exception:
|
||||
logger.error("Failed to process image (%s)", data)
|
||||
|
||||
Reference in New Issue
Block a user