[Bugfix] Fix Qwen2.5-Omni and Qwen3-Omni mixed-modality embed regression (#35368)
Signed-off-by: linyueqian <linyueqian@outlook.com>
This commit is contained in:
@@ -1904,15 +1904,17 @@ class Qwen3OmniMoeThinkerForConditionalGeneration(
|
||||
num_audio,
|
||||
)
|
||||
|
||||
# Default: standard merge (no interleaving)
|
||||
inputs_embeds = _merge_multimodal_embeddings(
|
||||
inputs_embeds=inputs_embeds,
|
||||
# Default: standard merge (no interleaving), same as parent class.
|
||||
# multimodal_embeddings may have been updated above (deepstack
|
||||
# main-scale). Use super() to stay consistent with the parent
|
||||
# implementation and avoid issues seen in Qwen2.5-Omni (#34506).
|
||||
return super().embed_input_ids(
|
||||
input_ids,
|
||||
multimodal_embeddings=multimodal_embeddings,
|
||||
is_multimodal=is_multimodal,
|
||||
handle_oov_mm_token=handle_oov_mm_token,
|
||||
)
|
||||
|
||||
return inputs_embeds
|
||||
|
||||
def forward(
|
||||
self,
|
||||
input_ids: torch.Tensor | None,
|
||||
|
||||
Reference in New Issue
Block a user