[Bugfix] Fix Qwen2.5-Omni and Qwen3-Omni mixed-modality embed regression (#35368)

Signed-off-by: linyueqian <linyueqian@outlook.com>
This commit is contained in:
Yueqian Lin
2026-02-26 06:58:23 -05:00
committed by GitHub
parent 01914445b0
commit c0615a296d
3 changed files with 379 additions and 21 deletions

View File

@@ -1904,15 +1904,17 @@ class Qwen3OmniMoeThinkerForConditionalGeneration(
num_audio,
)
# Default: standard merge (no interleaving)
inputs_embeds = _merge_multimodal_embeddings(
inputs_embeds=inputs_embeds,
# Default: standard merge (no interleaving), same as parent class.
# multimodal_embeddings may have been updated above (deepstack
# main-scale). Use super() to stay consistent with the parent
# implementation and avoid issues seen in Qwen2.5-Omni (#34506).
return super().embed_input_ids(
input_ids,
multimodal_embeddings=multimodal_embeddings,
is_multimodal=is_multimodal,
handle_oov_mm_token=handle_oov_mm_token,
)
return inputs_embeds
def forward(
self,
input_ids: torch.Tensor | None,