[Frontend] Support configurable mm placeholder strings & flexible video sampling policies via CLI flags. (#20105)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
@@ -346,6 +346,12 @@ class ModelConfig:
|
||||
limit_mm_per_prompt: dict[str, int] = field(default_factory=dict)
|
||||
"""Maximum number of data items per modality per prompt. Only applicable
|
||||
for multimodal models."""
|
||||
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
|
||||
"""Additional args passed to process media inputs, keyed by modalities.
|
||||
For example, to set num_frames for video, set
|
||||
`--media-io-kwargs '{"video": {"num_frames": 40} }'` """
|
||||
mm_placeholder_str_override: dict[str, str] = field(default_factory=dict)
|
||||
"""Optionally override placeholder string for given modalities."""
|
||||
use_async_output_proc: bool = True
|
||||
"""Whether to use async output processor."""
|
||||
config_format: Union[str, ConfigFormat] = ConfigFormat.AUTO.value
|
||||
@@ -694,6 +700,8 @@ class ModelConfig:
|
||||
if self.registry.is_multimodal_model(self.architectures):
|
||||
return MultiModalConfig(
|
||||
limit_per_prompt=self.limit_mm_per_prompt,
|
||||
media_io_kwargs=self.media_io_kwargs,
|
||||
mm_placeholder_str_override=self.mm_placeholder_str_override,
|
||||
mm_processor_kwargs=self.mm_processor_kwargs,
|
||||
disable_mm_preprocessor_cache=self.
|
||||
disable_mm_preprocessor_cache)
|
||||
@@ -3063,6 +3071,14 @@ class MultiModalConfig:
|
||||
`{"images": 16, "videos": 2}`
|
||||
"""
|
||||
|
||||
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
|
||||
"""Additional args passed to process media inputs, keyed by modalities.
|
||||
For example, to set num_frames for video, set
|
||||
`--media-io-kwargs '{"video": {"num_frames": 40} }'` """
|
||||
|
||||
mm_placeholder_str_override: dict[str, str] = field(default_factory=dict)
|
||||
"""Optionally override placeholder string for given modalities."""
|
||||
|
||||
mm_processor_kwargs: Optional[dict[str, object]] = None
|
||||
"""
|
||||
Overrides for the multi-modal processor obtained from
|
||||
|
||||
Reference in New Issue
Block a user