[Bugfix] Fix granite speech shape validation (#21762)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
@@ -64,14 +64,15 @@ class GraniteSpeechAudioInputs(TensorSchema):
|
|||||||
|
|
||||||
Dimensions:
|
Dimensions:
|
||||||
- b: Batch size
|
- b: Batch size
|
||||||
- nf: Number of audio features (variable length)
|
- fi: Number of input features from the Mel spectrogram.
|
||||||
|
- fo: Number of output features, i.e. the embedding size.
|
||||||
- 160: Fixed feature dimension for Mel spectrogram features
|
- 160: Fixed feature dimension for Mel spectrogram features
|
||||||
"""
|
"""
|
||||||
|
|
||||||
input_features: Annotated[torch.Tensor, TensorShape("b", "nf", 160)]
|
input_features: Annotated[torch.Tensor, TensorShape("b", "fi", 160)]
|
||||||
"""Audio input features."""
|
"""Audio input features."""
|
||||||
|
|
||||||
input_features_mask: Annotated[torch.Tensor, TensorShape("b", "nf")]
|
input_features_mask: Annotated[torch.Tensor, TensorShape("b", "fo")]
|
||||||
"""Mask for variable length audio features."""
|
"""Mask for variable length audio features."""
|
||||||
|
|
||||||
audio_embed_sizes: Annotated[list[int], TensorShape("b")]
|
audio_embed_sizes: Annotated[list[int], TensorShape("b")]
|
||||||
|
|||||||
Reference in New Issue
Block a user