[Frontend] Support configurable mm placeholder strings & flexible video sampling policies via CLI flags. (#20105)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
This commit is contained in:
Chenheli Hua
2025-07-01 23:34:03 -07:00
committed by GitHub
parent 7da296be04
commit 2e7cbf2d7d
12 changed files with 199 additions and 29 deletions

View File

@@ -507,6 +507,9 @@ class BaseMultiModalItemTracker(ABC, Generic[_T]):
def _placeholder_str(self, modality: ModalityStr,
current_count: int) -> Optional[str]:
if modality in self._model_config.mm_placeholder_str_override:
return self._model_config.mm_placeholder_str_override[modality]
# TODO: Let user specify how to insert image tokens into prompt
# (similar to chat template)
hf_config = self._model_config.hf_config
@@ -725,6 +728,7 @@ class MultiModalContentParser(BaseMultiModalContentParser):
self._tracker = tracker
self._connector = MediaConnector(
media_io_kwargs=self._tracker._model_config.media_io_kwargs,
allowed_local_media_path=tracker.allowed_local_media_path,
)
@@ -763,7 +767,7 @@ class MultiModalContentParser(BaseMultiModalContentParser):
return self.parse_audio(audio_url)
def parse_video(self, video_url: str) -> None:
video = self._connector.fetch_video(video_url)
video = self._connector.fetch_video(video_url=video_url)
placeholder = self._tracker.add("video", video)
self._add_placeholder(placeholder)
@@ -776,7 +780,8 @@ class AsyncMultiModalContentParser(BaseMultiModalContentParser):
self._tracker = tracker
self._connector = MediaConnector(
allowed_local_media_path=tracker.allowed_local_media_path,
media_io_kwargs=self._tracker._model_config.media_io_kwargs,
allowed_local_media_path=tracker.allowed_local_media_path
)
def parse_image(self, image_url: str) -> None:
@@ -818,7 +823,7 @@ class AsyncMultiModalContentParser(BaseMultiModalContentParser):
return self.parse_audio(audio_url)
def parse_video(self, video_url: str) -> None:
video = self._connector.fetch_video_async(video_url)
video = self._connector.fetch_video_async(video_url=video_url)
placeholder = self._tracker.add("video", video)
self._add_placeholder(placeholder)