Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import abc
|
||||
import math
|
||||
from typing import Any, Literal, Optional, Union
|
||||
from typing import Any, Literal
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -221,7 +221,7 @@ class ConformerEncoderLayer(nn.Module):
|
||||
pos_k: torch.Tensor,
|
||||
pos_v: torch.Tensor,
|
||||
mask: torch.Tensor,
|
||||
relative_attention_bias: Optional[Tensor] = None,
|
||||
relative_attention_bias: Tensor | None = None,
|
||||
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
|
||||
"""ConformerEncoder forward.
|
||||
|
||||
@@ -329,8 +329,8 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
def __init__(
|
||||
self,
|
||||
input_size: int,
|
||||
chunk_size: Union[int, list[int]],
|
||||
left_chunk: Union[int, list[int]],
|
||||
chunk_size: int | list[int],
|
||||
left_chunk: int | list[int],
|
||||
attention_dim: int = 256,
|
||||
attention_heads: int = 4,
|
||||
input_layer: str = "nemo_conv",
|
||||
@@ -339,12 +339,12 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
time_reduction: int = 4,
|
||||
dropout_rate: float = 0.0,
|
||||
padding_idx: int = -1,
|
||||
relative_attention_bias_args: Optional[dict[str, Any]] = None,
|
||||
relative_attention_bias_args: dict[str, Any] | None = None,
|
||||
positional_dropout_rate: float = 0.0,
|
||||
nemo_conv_settings: Optional[dict[str, Any]] = None,
|
||||
nemo_conv_settings: dict[str, Any] | None = None,
|
||||
conv2d_extra_padding: Literal["feat", "feat_time", "none", True] = "none",
|
||||
attention_group_size: int = 1,
|
||||
encoder_embedding_config: Optional[dict[str, Any]] = None,
|
||||
encoder_embedding_config: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
self.input_size = input_size
|
||||
@@ -411,8 +411,8 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
)
|
||||
|
||||
def compute_lens_change(
|
||||
self, feature_lens: Union[int, torch.Tensor]
|
||||
) -> Union[int, torch.Tensor]:
|
||||
self, feature_lens: int | torch.Tensor
|
||||
) -> int | torch.Tensor:
|
||||
"""feature_lens: int
|
||||
return updated feature lens.
|
||||
|
||||
@@ -452,8 +452,8 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
|
||||
def _chunk_size_selection(
|
||||
self,
|
||||
chunk_size: Optional[Union[int, list[int]]] = None,
|
||||
left_chunk: Optional[Union[int, list[int]]] = None,
|
||||
chunk_size: int | list[int] | None = None,
|
||||
left_chunk: int | list[int] | None = None,
|
||||
) -> tuple[int, int]:
|
||||
"""If chunk size is a list, we will randomly select a chunk size."""
|
||||
|
||||
@@ -503,7 +503,7 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
|
||||
def _position_embedding(
|
||||
self, input_tensor: torch.Tensor
|
||||
) -> tuple[Optional[torch.Tensor], Optional[torch.Tensor]]:
|
||||
) -> tuple[torch.Tensor | None, torch.Tensor | None]:
|
||||
pos_k = None
|
||||
pos_v = None
|
||||
if self.relative_attention_bias_layer is None:
|
||||
@@ -516,8 +516,8 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
self,
|
||||
seq_len: int,
|
||||
batch_size: int,
|
||||
chunk_size: Union[int, list[int]],
|
||||
left_chunk: Union[int, list[int]],
|
||||
chunk_size: int | list[int],
|
||||
left_chunk: int | list[int],
|
||||
) -> torch.Tensor:
|
||||
chunk_size_train_eff, left_chunk_train_eff = self._chunk_size_selection(
|
||||
chunk_size, left_chunk
|
||||
@@ -540,25 +540,25 @@ class TransformerEncoderBase(abc.ABC, nn.Module):
|
||||
self,
|
||||
xs_pad: torch.Tensor,
|
||||
masks: torch.Tensor,
|
||||
chunk_size_nc: Optional[Union[int, list[int]]] = None,
|
||||
left_chunk_nc: Optional[Union[int, list[int]]] = None,
|
||||
) -> Union[
|
||||
chunk_size_nc: int | list[int] | None = None,
|
||||
left_chunk_nc: int | list[int] | None = None,
|
||||
) -> (
|
||||
tuple[
|
||||
torch.Tensor,
|
||||
Optional[torch.Tensor],
|
||||
Optional[torch.Tensor],
|
||||
torch.Tensor | None,
|
||||
torch.Tensor | None,
|
||||
torch.Tensor,
|
||||
torch.Tensor,
|
||||
],
|
||||
tuple[
|
||||
]
|
||||
| tuple[
|
||||
torch.Tensor,
|
||||
Optional[torch.Tensor],
|
||||
Optional[torch.Tensor],
|
||||
torch.Tensor | None,
|
||||
torch.Tensor | None,
|
||||
torch.Tensor,
|
||||
torch.Tensor,
|
||||
torch.Tensor,
|
||||
],
|
||||
]:
|
||||
]
|
||||
):
|
||||
"""Forwarding the inputs through the top embedding layers
|
||||
|
||||
Args:
|
||||
@@ -803,9 +803,9 @@ class ConformerEncoder(TransformerEncoderBase):
|
||||
def __init__( # pylint: disable-all
|
||||
self,
|
||||
input_size: int,
|
||||
chunk_size: Union[int, list[int]],
|
||||
left_chunk: Union[int, list[int]],
|
||||
num_lang: Optional[int] = None,
|
||||
chunk_size: int | list[int],
|
||||
left_chunk: int | list[int],
|
||||
num_lang: int | None = None,
|
||||
attention_dim: int = 256,
|
||||
attention_heads: int = 4,
|
||||
linear_units: int = 2048,
|
||||
@@ -832,14 +832,14 @@ class ConformerEncoder(TransformerEncoderBase):
|
||||
extra_layer_output_idx: int = -1,
|
||||
extra_multi_layer_output_idxs: list[int] = [], # noqa
|
||||
activation_checkpointing: str = "",
|
||||
relative_attention_bias_args: Optional[dict[str, Any]] = None,
|
||||
relative_attention_bias_args: dict[str, Any] | None = None,
|
||||
time_reduction: int = 4,
|
||||
use_pt_scaled_dot_product_attention: bool = False,
|
||||
nemo_conv_settings: Optional[dict[str, Any]] = None,
|
||||
nemo_conv_settings: dict[str, Any] | None = None,
|
||||
conv2d_extra_padding: Literal["feat", "feat_time", "none", True] = "none",
|
||||
replication_pad_for_subsample_embedding: bool = False,
|
||||
attention_group_size: int = 1,
|
||||
encoder_embedding_config: Optional[dict[str, Any]] = None,
|
||||
encoder_embedding_config: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
super().__init__(
|
||||
input_size,
|
||||
@@ -908,12 +908,12 @@ class ConformerEncoder(TransformerEncoderBase):
|
||||
|
||||
def init_relative_attention_bias(
|
||||
self, input_tensor: torch.Tensor
|
||||
) -> Optional[torch.Tensor]:
|
||||
) -> torch.Tensor | None:
|
||||
if self.relative_attention_bias_layer:
|
||||
return self.relative_attention_bias_layer(input_tensor)
|
||||
|
||||
def calculate_hs_mask(
|
||||
self, xs_pad: torch.Tensor, device: torch.device, mask: Optional[torch.Tensor]
|
||||
self, xs_pad: torch.Tensor, device: torch.device, mask: torch.Tensor | None
|
||||
) -> torch.Tensor:
|
||||
max_audio_length = xs_pad.shape[1]
|
||||
batch_size = xs_pad.shape[0]
|
||||
@@ -1066,9 +1066,9 @@ class WindowQformer(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
audio_embed: torch.Tensor,
|
||||
mask: Optional[torch.Tensor],
|
||||
embed_len: Optional[int] = None,
|
||||
) -> tuple[torch.Tensor, Optional[int]]:
|
||||
mask: torch.Tensor | None,
|
||||
embed_len: int | None = None,
|
||||
) -> tuple[torch.Tensor, int | None]:
|
||||
"""forward decoder"""
|
||||
# audio_embed: N x T x D => N x D x T
|
||||
|
||||
@@ -1224,7 +1224,7 @@ class AudioEmbedding(nn.Module):
|
||||
def get_audio_features(
|
||||
self,
|
||||
input_embeds: torch.Tensor,
|
||||
audio_attention_mask: Optional[torch.Tensor] = None,
|
||||
audio_attention_mask: torch.Tensor | None = None,
|
||||
audio_projection_mode: str = "speech",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
@@ -1278,7 +1278,7 @@ class AudioEmbedding(nn.Module):
|
||||
def forward(
|
||||
self,
|
||||
audio_features: torch.Tensor,
|
||||
audio_attention_mask: Optional[torch.Tensor] = None,
|
||||
audio_attention_mask: torch.Tensor | None = None,
|
||||
audio_projection_mode: str = "speech",
|
||||
) -> torch.Tensor:
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user