[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
2025-06-30 13:26:49 -04:00
parent 551ef1631a
commit d8cf819a9a
41 changed files with 207 additions and 38 deletions
--- a/vllm/model_executor/models/qwen_vl.py
+++ b/vllm/model_executor/models/qwen_vl.py
@@ -580,6 +580,7 @@ class QwenVLMultiModalProcessor(BaseMultiModalProcessor[QwenVLProcessingInfo]):
        prompt: str,
        mm_data: Mapping[str, object],
        mm_kwargs: Mapping[str, object],
+        tok_kwargs: Mapping[str, object],
    ) -> BatchFeature:
        # Drops anything between <img>/</img> tags; encoding with the tokenizer
        # will automatically add the image pads for the context.
@@ -600,6 +601,7 @@ class QwenVLMultiModalProcessor(BaseMultiModalProcessor[QwenVLProcessingInfo]):
            prompt=prompt,
            mm_data=mm_data,
            mm_kwargs=mm_kwargs,
+            tok_kwargs=tok_kwargs,
        )

    def _hf_processor_applies_updates(
@@ -607,6 +609,7 @@ class QwenVLMultiModalProcessor(BaseMultiModalProcessor[QwenVLProcessingInfo]):
        prompt_text: str,
        mm_items: MultiModalDataItems,
        hf_processor_mm_kwargs: Mapping[str, object],
+        tokenization_kwargs: Mapping[str, object],
    ) -> bool:
        return False