[Refactor] Move MM data parsing outside processor (#33408)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
2026-02-01 00:46:14 +08:00
parent 92924b2ddd
commit 88c3e114d8
43 changed files with 228 additions and 139 deletions
--- a/vllm/model_executor/models/whisper.py
+++ b/vllm/model_executor/models/whisper.py
@@ -705,7 +705,7 @@ class WhisperMultiModalProcessor(EncDecMultiModalProcessor[WhisperProcessingInfo
    def create_encoder_prompt(
        self,
        prompt: str | list[int],
-        mm_data: MultiModalDataDict,
+        mm_items: MultiModalDataItems,
    ) -> str | list[int]:
        # Strictly speaking, whisper encoder only accept audio features.
        # We create a dummy encoder prompt here which will be padded to