[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
Kyle Sayers
2025-06-30 13:26:49 -04:00
committed by GitHub
parent 551ef1631a
commit d8cf819a9a
41 changed files with 207 additions and 38 deletions

View File

@@ -260,6 +260,7 @@ class MiniCPMOMultiModalProcessor(
self,
mm_data: Mapping[str, object],
mm_kwargs: Mapping[str, object],
tok_kwargs: Mapping[str, object],
) -> Mapping[str, NestedTensors]:
if (audios := mm_data.get("audios")) is None:
return {}
@@ -276,9 +277,9 @@ class MiniCPMOMultiModalProcessor(
prompts=[self.info.audio_pattern] * len(parsed_audios),
mm_data={"audios": [[audio] for audio in parsed_audios]},
mm_kwargs={
**mm_kwargs,
"chunk_input": True,
**mm_kwargs, "chunk_input": True
},
tok_kwargs=tok_kwargs,
out_keys={"audio_features", "audio_feature_lens"},
)
@@ -302,10 +303,11 @@ class MiniCPMOMultiModalProcessor(
self,
mm_data: Mapping[str, object],
mm_kwargs: Mapping[str, object],
tok_kwargs: Mapping[str, object],
) -> Mapping[str, NestedTensors]:
return {
**super().process_mm_inputs(mm_data, mm_kwargs),
**self.process_audios(mm_data, mm_kwargs),
**super().process_mm_inputs(mm_data, mm_kwargs, tok_kwargs),
**self.process_audios(mm_data, mm_kwargs, tok_kwargs),
}
def _get_prompt_updates(