[Core] More fixes to MultiModalEmbeddings type handling (#19715)

Signed-off-by: Russell Bryant <rbryant@redhat.com>
2025-06-18 18:48:29 -04:00
parent 04fefe7c9a
commit 14fdd21d39
35 changed files with 71 additions and 36 deletions
--- a/vllm/model_executor/models/granite_speech.py
+++ b/vllm/model_executor/models/granite_speech.py
@@ -721,7 +721,8 @@ class GraniteSpeechForConditionalGeneration(
        multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
    ) -> torch.Tensor:
        """Compute the merged LLM / audio embeddings."""
-        if multimodal_embeddings is None:
+        if multimodal_embeddings is None \
+            or len(multimodal_embeddings) == 0:
            return self.language_model.get_input_embeddings(input_ids)

        inputs_embeds = embed_multimodal(