[Refactor] Move MM data parsing outside processor (#33408)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
Cyrus Leung
2026-02-01 00:46:14 +08:00
committed by GitHub
parent 92924b2ddd
commit 88c3e114d8
43 changed files with 228 additions and 139 deletions

View File

@@ -705,7 +705,7 @@ class WhisperMultiModalProcessor(EncDecMultiModalProcessor[WhisperProcessingInfo
def create_encoder_prompt(
self,
prompt: str | list[int],
mm_data: MultiModalDataDict,
mm_items: MultiModalDataItems,
) -> str | list[int]:
# Strictly speaking, whisper encoder only accept audio features.
# We create a dummy encoder prompt here which will be padded to