[Core] Use key-only cache for BaseMultiModalProcessor (#23018)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2025-08-27 14:19:13 +08:00
committed by GitHub
parent 8dbf6ed7be
commit 69244e67e6
29 changed files with 954 additions and 394 deletions

View File

@@ -27,7 +27,7 @@ from vllm.multimodal.parse import (AudioProcessorItems, ImageEmbeddingItems,
MultiModalDataItems, MultiModalDataParser)
from vllm.multimodal.processing import (BaseMultiModalProcessor,
BaseProcessingInfo, PromptReplacement,
PromptUpdate)
PromptUpdate, ResolvedPromptUpdate)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.sequence import IntermediateTensors
from vllm.utils import is_list_of
@@ -850,6 +850,25 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
),
]
def _recompute_cached_prompt_update(
self,
cached_update: ResolvedPromptUpdate,
new_item_idx: int,
) -> ResolvedPromptUpdate:
new_update = super()._recompute_cached_prompt_update(
cached_update,
new_item_idx,
)
if cached_update.modality == "image":
image_tokens: list[str] = self.info.image_tokens # type: ignore
new_update = new_update.with_target(image_tokens[new_item_idx])
elif cached_update.modality == "audio":
audio_tokens: list[str] = self.info.audio_tokens # type: ignore
new_update = new_update.with_target(audio_tokens[new_item_idx])
return new_update
@MULTIMODAL_REGISTRY.register_processor(
Phi4MMMultiModalProcessor,