[VLM] Avoid unnecessary tokenization (#12310)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-01-22 19:08:31 +08:00
parent 68ad4e3a8d
commit cd7b6f0857
9 changed files with 71 additions and 40 deletions
--- a/vllm/model_executor/models/deepseek_vl2.py
+++ b/vllm/model_executor/models/deepseek_vl2.py
@@ -249,8 +249,10 @@ class DeepseekVL2MultiModalProcessor(
        hf_processor_mm_kwargs: Mapping[str, object],
        out_mm_kwargs: MultiModalKwargs,
    ) -> list[PromptReplacement]:
-        hf_processor = self.info.get_hf_processor()
-        image_token_id: int = hf_processor.image_token_id
+        hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs)
+
+        image_token_id = hf_processor.image_token_id
+        assert isinstance(image_token_id, int)

        def get_replacement_deepseek_vl2(item_idx: int):
            images = mm_items.get_items(