[Misc] Add fully interleaved support for multimodal 'string' content format (#14047)

Signed-off-by: drobyshev.anton <drobyshev.anton@wb.ru>
Co-authored-by: drobyshev.anton <drobyshev.anton@wb.ru>
This commit is contained in:
Anton
2025-07-07 22:43:08 +03:00
committed by GitHub
parent 22dd9c2730
commit e601efcb10
4 changed files with 478 additions and 43 deletions

View File

@@ -370,6 +370,7 @@ class EngineArgs:
get_field(TokenizerPoolConfig, "extra_config")
limit_mm_per_prompt: dict[str, int] = \
get_field(MultiModalConfig, "limit_per_prompt")
interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
media_io_kwargs: dict[str, dict[str,
Any]] = get_field(MultiModalConfig,
"media_io_kwargs")
@@ -763,6 +764,9 @@ class EngineArgs:
multimodal_group.add_argument(
"--disable-mm-preprocessor-cache",
**multimodal_kwargs["disable_mm_preprocessor_cache"])
multimodal_group.add_argument(
"--interleave-mm-strings",
**multimodal_kwargs["interleave_mm_strings"])
# LoRA related configs
lora_kwargs = get_kwargs(LoRAConfig)
@@ -981,6 +985,7 @@ class EngineArgs:
enable_prompt_embeds=self.enable_prompt_embeds,
served_model_name=self.served_model_name,
limit_mm_per_prompt=self.limit_mm_per_prompt,
interleave_mm_strings=self.interleave_mm_strings,
media_io_kwargs=self.media_io_kwargs,
use_async_output_proc=not self.disable_async_output_proc,
config_format=self.config_format,