[Core] Move multimodal placeholder from chat utils to model definition (#20355)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-07-03 16:18:30 +08:00
parent cb97f2bfc5
commit b024a42e93
54 changed files with 396 additions and 155 deletions
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -350,8 +350,6 @@ class ModelConfig:
    """Additional args passed to process media inputs, keyed by modalities. 
    For example, to set num_frames for video, set 
    `--media-io-kwargs '{"video": {"num_frames": 40} }'` """
-    mm_placeholder_str_override: dict[str, str] = field(default_factory=dict)
-    """Optionally override placeholder string for given modalities."""
    use_async_output_proc: bool = True
    """Whether to use async output processor."""
    config_format: Union[str, ConfigFormat] = ConfigFormat.AUTO.value
@@ -661,7 +659,7 @@ class ModelConfig:
        return self._architecture

    @property
-    def model_info(self) -> dict[str, Any]:
+    def model_info(self):
        return self._model_info

    def maybe_pull_model_tokenizer_for_s3(self, model: str,
@@ -701,7 +699,6 @@ class ModelConfig:
            return MultiModalConfig(
                limit_per_prompt=self.limit_mm_per_prompt,
                media_io_kwargs=self.media_io_kwargs,
-                mm_placeholder_str_override=self.mm_placeholder_str_override,
                mm_processor_kwargs=self.mm_processor_kwargs,
                disable_mm_preprocessor_cache=self.
                disable_mm_preprocessor_cache)
@@ -3096,9 +3093,6 @@ class MultiModalConfig:
    For example, to set num_frames for video, set 
    `--media-io-kwargs '{"video": {"num_frames": 40} }'` """

-    mm_placeholder_str_override: dict[str, str] = field(default_factory=dict)
-    """Optionally override placeholder string for given modalities."""
-
    mm_processor_kwargs: Optional[dict[str, object]] = None
    """
    Overrides for the multi-modal processor obtained from