[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
@@ -762,6 +762,7 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
|
||||
prompt: str,
|
||||
mm_data: Mapping[str, object],
|
||||
mm_kwargs: Mapping[str, object],
|
||||
tok_kwargs: Mapping[str, object],
|
||||
) -> BatchFeature:
|
||||
if not mm_data:
|
||||
prompt_ids = self.info.get_tokenizer().encode(prompt)
|
||||
@@ -773,7 +774,7 @@ class Phi4MMMultiModalProcessor(BaseMultiModalProcessor[Phi4MMProcessingInfo]):
|
||||
mm_data['audios'] = [(data, sr) for data in audio_data]
|
||||
|
||||
processed_outputs = super()._call_hf_processor(prompt, mm_data,
|
||||
mm_kwargs)
|
||||
mm_kwargs, tok_kwargs)
|
||||
|
||||
num_img_tokens = [
|
||||
self.info.get_num_image_tokens(image_width=img_size[0],
|
||||
|
||||
Reference in New Issue
Block a user