[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
2025-06-30 13:26:49 -04:00
parent 551ef1631a
commit d8cf819a9a
41 changed files with 207 additions and 38 deletions
--- a/vllm/model_executor/models/qwen2_5_omni_thinker.py
+++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py
@@ -244,6 +244,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
        prompt: str,
        mm_data: Mapping[str, object],
        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
    ) -> BatchFeature:
        mm_data = dict(mm_data)
        audios = mm_data.pop("audios", [])
@@ -258,6 +259,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
            prompt=prompt,
            mm_data=mm_data,
            mm_kwargs=mm_kwargs,
+            tok_kwargs=tok_kwargs,
        )

        input_features = hf_inputs.pop('input_features', None)
@@ -453,6 +455,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
        prompt: Union[str, list[int]],
        mm_items: MultiModalDataItems,
        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
        *,
        enable_hf_prompt_update: bool,
    ) -> tuple[list[int], MultiModalKwargs, bool]:
@@ -465,6 +468,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
                    prompt_text=prompt,
                    mm_items=mm_items,
                    hf_processor_mm_kwargs=hf_processor_mm_kwargs,
+                    tokenization_kwargs=tokenization_kwargs,
                )
            tokenizer = self.info.get_tokenizer()
            prompt_ids = encode_tokens(tokenizer, prompt)
@@ -474,6 +478,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
        mm_kwargs = self._apply_hf_processor_mm_only(
            mm_items=mm_items,
            hf_processor_mm_kwargs=hf_processor_mm_kwargs,
+            tokenization_kwargs=tokenization_kwargs,
        )

        return prompt_ids, mm_kwargs, False
@@ -482,6 +487,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
        self,
        mm_items: MultiModalDataItems,
        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
    ) -> MultiModalKwargs:
        """
        Qwen2.5-Omni reimplements this function to handle `use_audio_in_video`.
@@ -498,6 +504,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
            prompt_text=self.dummy_inputs.get_dummy_text(mm_counts),
            mm_items=mm_items,
            hf_processor_mm_kwargs=hf_processor_mm_kwargs,
+            tokenization_kwargs=tokenization_kwargs,
        )

        return mm_kwargs