[Bugfix] Fix Qwen2.5-Omni/Qwen3-Omni use_audio_in_video with multi-video inputs (#37147)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Isotr0py
2026-03-16 16:56:06 +08:00
committed by GitHub
parent 52131f88d9
commit 912fbe9555
4 changed files with 117 additions and 17 deletions

View File

@@ -774,9 +774,7 @@ class Qwen2_5OmniThinkerMultiModalProcessor(
def get_replacement_qwen2_use_audio_in_video(item_idx: int):
nonlocal audio_in_video_item_idx
audio_num_features = audio_output_lengths[
audio_in_video_item_idx + item_idx
]
audio_num_features = audio_output_lengths[audio_in_video_item_idx]
video_grid_thw = out_mm_data["video_grid_thw"][item_idx]
audio_in_video_item_idx += 1

View File

@@ -1489,9 +1489,7 @@ class Qwen3OmniMoeThinkerMultiModalProcessor(
def get_replacement_qwen2_use_audio_in_video(item_idx: int):
nonlocal audio_in_video_item_idx
audio_num_features = audio_output_lengths[
audio_in_video_item_idx + item_idx
]
audio_num_features = audio_output_lengths[audio_in_video_item_idx]
video_grid_thw = out_mm_data["video_grid_thw"][item_idx]
audio_in_video_item_idx += 1