[V1][Molmo] Fix get_multimodal_embeddings() in molmo.py (#14161)
This commit is contained in:
@@ -692,7 +692,9 @@ class WhisperForConditionalGeneration(nn.Module, SupportsTranscription,
|
||||
)
|
||||
return decoder_outputs
|
||||
|
||||
def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]:
|
||||
def get_multimodal_embeddings(
|
||||
self, **kwargs
|
||||
) -> Union[list[torch.Tensor], torch.Tensor, tuple[torch.Tensor, ...]]:
|
||||
# TODO: This method does not obey the interface for SupportsMultiModal.
|
||||
# Refactor this once encoder/decoder support is implemented in V1.
|
||||
audio_input = self._parse_and_validate_audio_input(**kwargs)
|
||||
|
||||
Reference in New Issue
Block a user