[Core] Use key-only cache for BaseMultiModalProcessor (#23018)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2025-08-27 14:19:13 +08:00
parent 8dbf6ed7be
commit 69244e67e6
29 changed files with 954 additions and 394 deletions
--- a/vllm/model_executor/models/phi4mm.py
+++ b/vllm/model_executor/models/phi4mm.py
@@ -27,7 +27,7 @@ from vllm.multimodal.parse import (AudioProcessorItems, ImageEmbeddingItems,
                                   MultiModalDataItems, MultiModalDataParser)
 from vllm.multimodal.processing import (BaseMultiModalProcessor,
                                        BaseProcessingInfo, PromptReplacement,
-                                        PromptUpdate)
+                                        PromptUpdate, ResolvedPromptUpdate)
 from vllm.multimodal.profiling import BaseDummyInputsBuilder
 from vllm.sequence import IntermediateTensors
 from vllm.utils import is_list_of
@@ -850,6 +850,25 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
            ),
        ]

+    def _recompute_cached_prompt_update(
+        self,
+        cached_update: ResolvedPromptUpdate,
+        new_item_idx: int,
+    ) -> ResolvedPromptUpdate:
+        new_update = super()._recompute_cached_prompt_update(
+            cached_update,
+            new_item_idx,
+        )
+
+        if cached_update.modality == "image":
+            image_tokens: list[str] = self.info.image_tokens  # type: ignore
+            new_update = new_update.with_target(image_tokens[new_item_idx])
+        elif cached_update.modality == "audio":
+            audio_tokens: list[str] = self.info.audio_tokens  # type: ignore
+            new_update = new_update.with_target(audio_tokens[new_item_idx])
+
+        return new_update
+

@MULTIMODAL_REGISTRY.register_processor(
    Phi4MMMultiModalProcessor,