[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)

Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
Kyle Sayers
2025-06-30 13:26:49 -04:00
committed by GitHub
parent 551ef1631a
commit d8cf819a9a
41 changed files with 207 additions and 38 deletions

View File

@@ -121,6 +121,7 @@ class PaliGemmaMultiModalProcessor(
prompt: str,
mm_data: Mapping[str, object],
mm_kwargs: Mapping[str, object],
tok_kwargs: Mapping[str, object],
) -> BatchFeature:
tokenizer = self.info.get_tokenizer()
if not mm_data:
@@ -131,6 +132,7 @@ class PaliGemmaMultiModalProcessor(
prompt=prompt,
mm_data=mm_data,
mm_kwargs=mm_kwargs,
tok_kwargs=tok_kwargs,
)
def _get_mm_fields_config(
@@ -191,10 +193,11 @@ class PaliGemmaMultiModalProcessor(
prompt: Union[str, list[int]],
mm_data: MultiModalDataDict,
hf_processor_mm_kwargs: Mapping[str, object],
tokenization_kwargs: Optional[Mapping[str, object]] = None,
return_mm_hashes: bool = False,
) -> MultiModalInputs:
mm_inputs = super().apply(prompt, mm_data, hf_processor_mm_kwargs,
return_mm_hashes)
tokenization_kwargs, return_mm_hashes)
prompt_token_ids = mm_inputs["prompt_token_ids"]
tokenizer = self.info.get_tokenizer()