[Core] More fixes to MultiModalEmbeddings type handling (#19715)
Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
@@ -805,7 +805,8 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
||||
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
|
||||
) -> torch.Tensor:
|
||||
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
|
||||
if multimodal_embeddings is not None:
|
||||
if multimodal_embeddings is not None \
|
||||
and len(multimodal_embeddings) != 0:
|
||||
|
||||
# TODO (ywang96): support overlapping modalitiy embeddings so that
|
||||
# `use_audio_in_video` will work on V1.
|
||||
@@ -845,7 +846,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
||||
multimodal_embeddings: Optional[NestedTensors] = None,
|
||||
) -> torch.Tensor:
|
||||
inputs_embeds = self.language_model.get_input_embeddings(input_ids)
|
||||
if multimodal_embeddings is None:
|
||||
if multimodal_embeddings is None or len(multimodal_embeddings) == 0:
|
||||
return inputs_embeds
|
||||
|
||||
for embeddings, modality in multimodal_embeddings:
|
||||
|
||||
Reference in New Issue
Block a user