[Core] [Bugfix] [Multimodal] Fix multimodal profiling and generation for SFT/PTQed models (#20058)
Signed-off-by: Kyle Sayers <kylesayrs@gmail.com>
This commit is contained in:
@@ -244,6 +244,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
prompt: str,
|
||||
mm_data: Mapping[str, object],
|
||||
mm_kwargs: Mapping[str, object],
|
||||
tok_kwargs: Mapping[str, object],
|
||||
) -> BatchFeature:
|
||||
mm_data = dict(mm_data)
|
||||
audios = mm_data.pop("audios", [])
|
||||
@@ -258,6 +259,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
prompt=prompt,
|
||||
mm_data=mm_data,
|
||||
mm_kwargs=mm_kwargs,
|
||||
tok_kwargs=tok_kwargs,
|
||||
)
|
||||
|
||||
input_features = hf_inputs.pop('input_features', None)
|
||||
@@ -453,6 +455,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
prompt: Union[str, list[int]],
|
||||
mm_items: MultiModalDataItems,
|
||||
hf_processor_mm_kwargs: Mapping[str, object],
|
||||
tokenization_kwargs: Mapping[str, object],
|
||||
*,
|
||||
enable_hf_prompt_update: bool,
|
||||
) -> tuple[list[int], MultiModalKwargs, bool]:
|
||||
@@ -465,6 +468,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
prompt_text=prompt,
|
||||
mm_items=mm_items,
|
||||
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
tokenizer = self.info.get_tokenizer()
|
||||
prompt_ids = encode_tokens(tokenizer, prompt)
|
||||
@@ -474,6 +478,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
mm_kwargs = self._apply_hf_processor_mm_only(
|
||||
mm_items=mm_items,
|
||||
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
|
||||
return prompt_ids, mm_kwargs, False
|
||||
@@ -482,6 +487,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
self,
|
||||
mm_items: MultiModalDataItems,
|
||||
hf_processor_mm_kwargs: Mapping[str, object],
|
||||
tokenization_kwargs: Mapping[str, object],
|
||||
) -> MultiModalKwargs:
|
||||
"""
|
||||
Qwen2.5-Omni reimplements this function to handle `use_audio_in_video`.
|
||||
@@ -498,6 +504,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
prompt_text=self.dummy_inputs.get_dummy_text(mm_counts),
|
||||
mm_items=mm_items,
|
||||
hf_processor_mm_kwargs=hf_processor_mm_kwargs,
|
||||
tokenization_kwargs=tokenization_kwargs,
|
||||
)
|
||||
|
||||
return mm_kwargs
|
||||
|
||||
Reference in New Issue
Block a user