[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
@@ -260,6 +260,7 @@ class MiniCPMOMultiModalProcessor(
|
||||
self,
|
||||
mm_data: Mapping[str, object],
|
||||
mm_kwargs: Mapping[str, object],
|
||||
tok_kwargs: Mapping[str, object],
|
||||
) -> Mapping[str, NestedTensors]:
|
||||
if (audios := mm_data.get("audios")) is None:
|
||||
return {}
|
||||
@@ -276,9 +277,9 @@ class MiniCPMOMultiModalProcessor(
|
||||
prompts=[self.info.audio_pattern] * len(parsed_audios),
|
||||
mm_data={"audios": [[audio] for audio in parsed_audios]},
|
||||
mm_kwargs={
|
||||
**mm_kwargs,
|
||||
"chunk_input": True,
|
||||
**mm_kwargs, "chunk_input": True
|
||||
},
|
||||
tok_kwargs=tok_kwargs,
|
||||
out_keys={"audio_features", "audio_feature_lens"},
|
||||
)
|
||||
|
||||
@@ -302,10 +303,11 @@ class MiniCPMOMultiModalProcessor(
|
||||
self,
|
||||
mm_data: Mapping[str, object],
|
||||
mm_kwargs: Mapping[str, object],
|
||||
tok_kwargs: Mapping[str, object],
|
||||
) -> Mapping[str, NestedTensors]:
|
||||
return {
|
||||
**super().process_mm_inputs(mm_data, mm_kwargs),
|
||||
**self.process_audios(mm_data, mm_kwargs),
|
||||
**super().process_mm_inputs(mm_data, mm_kwargs, tok_kwargs),
|
||||
**self.process_audios(mm_data, mm_kwargs, tok_kwargs),
|
||||
}
|
||||
|
||||
def _get_prompt_updates(
|
||||
|
||||
Reference in New Issue
Block a user