[Frontend] Support configurable mm placeholder strings & flexible video sampling policies via CLI flags. (#20105)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
Chenheli Hua
2025-07-01 23:34:03 -07:00
committed by GitHub
parent 7da296be04
commit 2e7cbf2d7d
12 changed files with 199 additions and 29 deletions

View File

@@ -346,6 +346,12 @@ class ModelConfig:
limit_mm_per_prompt: dict[str, int] = field(default_factory=dict)
"""Maximum number of data items per modality per prompt. Only applicable
for multimodal models."""
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
"""Additional args passed to process media inputs, keyed by modalities.
For example, to set num_frames for video, set
`--media-io-kwargs '{"video": {"num_frames": 40} }'` """
mm_placeholder_str_override: dict[str, str] = field(default_factory=dict)
"""Optionally override placeholder string for given modalities."""
use_async_output_proc: bool = True
"""Whether to use async output processor."""
config_format: Union[str, ConfigFormat] = ConfigFormat.AUTO.value
@@ -694,6 +700,8 @@ class ModelConfig:
if self.registry.is_multimodal_model(self.architectures):
return MultiModalConfig(
limit_per_prompt=self.limit_mm_per_prompt,
media_io_kwargs=self.media_io_kwargs,
mm_placeholder_str_override=self.mm_placeholder_str_override,
mm_processor_kwargs=self.mm_processor_kwargs,
disable_mm_preprocessor_cache=self.
disable_mm_preprocessor_cache)
@@ -3063,6 +3071,14 @@ class MultiModalConfig:
`{"images": 16, "videos": 2}`
"""
media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict)
"""Additional args passed to process media inputs, keyed by modalities.
For example, to set num_frames for video, set
`--media-io-kwargs '{"video": {"num_frames": 40} }'` """
mm_placeholder_str_override: dict[str, str] = field(default_factory=dict)
"""Optionally override placeholder string for given modalities."""
mm_processor_kwargs: Optional[dict[str, object]] = None
"""
Overrides for the multi-modal processor obtained from