[Bugfix] Fix Qwen2.5-Omni/Qwen3-Omni use_audio_in_video with multi-video inputs (#37147)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@@ -774,9 +774,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
|
||||
def get_replacement_qwen2_use_audio_in_video(item_idx: int):
|
||||
nonlocal audio_in_video_item_idx
|
||||
|
||||
audio_num_features = audio_output_lengths[
|
||||
audio_in_video_item_idx + item_idx
|
||||
]
|
||||
audio_num_features = audio_output_lengths[audio_in_video_item_idx]
|
||||
video_grid_thw = out_mm_data["video_grid_thw"][item_idx]
|
||||
|
||||
audio_in_video_item_idx += 1
|
||||
|
||||
@@ -1489,9 +1489,7 @@ class Qwen3OmniMoeThinkerMultiModalProcessor(
|
||||
|
||||
def get_replacement_qwen2_use_audio_in_video(item_idx: int):
|
||||
nonlocal audio_in_video_item_idx
|
||||
audio_num_features = audio_output_lengths[
|
||||
audio_in_video_item_idx + item_idx
|
||||
]
|
||||
audio_num_features = audio_output_lengths[audio_in_video_item_idx]
|
||||
video_grid_thw = out_mm_data["video_grid_thw"][item_idx]
|
||||
|
||||
audio_in_video_item_idx += 1
|
||||
|
||||
Reference in New Issue
Block a user