[Doc]: fixing doc typos (#24635)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
@@ -846,7 +846,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
||||
return []
|
||||
|
||||
# The result multimodal_embeddings is tuple of tensors, with each
|
||||
# tensor correspoending to a multimodal data item (image or video).
|
||||
# tensor corresponding to a multimodal data item (image or video).
|
||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||
|
||||
# NOTE: It is important to iterate over the keys in this dictionary
|
||||
@@ -873,7 +873,7 @@ class Qwen2_5OmniThinkerForConditionalGeneration(
|
||||
if multimodal_embeddings is not None \
|
||||
and len(multimodal_embeddings) != 0:
|
||||
|
||||
# TODO (ywang96): support overlapping modalitiy embeddings so that
|
||||
# TODO (ywang96): support overlapping modality embeddings so that
|
||||
# `use_audio_in_video` will work on V1.
|
||||
inputs_embeds = merge_multimodal_embeddings(
|
||||
input_ids, inputs_embeds, multimodal_embeddings, [
|
||||
|
||||
Reference in New Issue
Block a user