Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -3,7 +3,7 @@
import itertools
from collections.abc import Mapping, Sequence
from functools import partial
from typing import Annotated, Any, Literal, Optional, Union
from typing import Annotated, Any, Literal, TypeAlias
import numpy as np
import torch
@@ -73,7 +73,7 @@ def split_thw(grid_thw: torch.Tensor) -> torch.Tensor:
def get_num_patches(
grid_thw: torch.Tensor, num_frames: Union[list[int], torch.Tensor]
grid_thw: torch.Tensor, num_frames: list[int] | torch.Tensor
) -> list[int]:
"""
Return num_patches per video.
@@ -153,7 +153,9 @@ class KeyeVL1_5ImageEmbeddingInputs(TensorSchema):
image_grid_thw: Annotated[torch.Tensor, TensorShape("ni", 3)]
KeyeVL1_5ImageInputs = Union[KeyeVL1_5ImagePixelInputs, KeyeVL1_5ImageEmbeddingInputs]
KeyeVL1_5ImageInputs: TypeAlias = (
KeyeVL1_5ImagePixelInputs | KeyeVL1_5ImageEmbeddingInputs
)
class KeyeVL1_5VideoPixelInputs(TensorSchema):
@@ -191,7 +193,9 @@ class KeyeVL1_5VideoEmbeddingInputs(TensorSchema):
num_frames: torch.Tensor
KeyeVL1_5VideoInputs = Union[KeyeVL1_5VideoPixelInputs, KeyeVL1_5VideoEmbeddingInputs]
KeyeVL1_5VideoInputs: TypeAlias = (
KeyeVL1_5VideoPixelInputs | KeyeVL1_5VideoEmbeddingInputs
)
class KeyeVL1_5Projector(nn.Module):
@@ -199,7 +203,7 @@ class KeyeVL1_5Projector(nn.Module):
self,
text_config: PretrainedConfig,
vision_config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
quant_config: QuantizationConfig | None = None,
prefix: str = "",
):
super().__init__()
@@ -233,9 +237,9 @@ class KeyeVL1_5Projector(nn.Module):
def forward(
self,
image_features: Union[torch.Tensor, tuple[torch.Tensor], list[torch.Tensor]],
image_features: torch.Tensor | tuple[torch.Tensor] | list[torch.Tensor],
image_grid_thw: list[tuple[int, int, int]],
) -> Union[torch.Tensor, list[torch.Tensor]]:
) -> torch.Tensor | list[torch.Tensor]:
m1, m2 = self.merge_kernel_size
if isinstance(image_features, (list, tuple)):
processed_features = list()
@@ -275,7 +279,7 @@ class KeyeVL1_5ProcessingInfo(KeyeProcessingInfo):
def get_supported_mm_limits(
self,
) -> Mapping[str, Optional[int]]:
) -> Mapping[str, int | None]:
return {"image": None, "video": 1}
@@ -327,7 +331,7 @@ def _keye_field_config(
class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
def _parse_image_data(
self,
data: Union[dict[str, torch.Tensor], ModalityData[ImageItem]],
data: dict[str, torch.Tensor] | ModalityData[ImageItem],
) -> ModalityDataItems[Any, Any]:
if isinstance(data, dict):
return DictEmbeddingItems(
@@ -344,7 +348,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
def _parse_video_data(
self,
data: Union[dict[str, torch.Tensor], ModalityData[VideoItem]],
data: dict[str, torch.Tensor] | ModalityData[VideoItem],
) -> ModalityDataItems[Any, Any]:
if isinstance(data, dict):
return DictEmbeddingItems(
@@ -499,7 +503,7 @@ class KeyeVL1_5ForConditionalGeneration(
self,
text_config: PretrainedConfig,
vision_config: PretrainedConfig,
quant_config: Optional[QuantizationConfig] = None,
quant_config: QuantizationConfig | None = None,
prefix: str = "",
) -> nn.Module:
return KeyeVL1_5Projector(text_config, vision_config, quant_config, prefix)
@@ -511,7 +515,7 @@ class KeyeVL1_5ForConditionalGeneration(
def _parse_and_validate_image_input(
self, **kwargs: object
) -> Optional[KeyeVL1_5ImageInputs]:
) -> KeyeVL1_5ImageInputs | None:
pixel_values = kwargs.pop("pixel_values", None)
image_embeds = kwargs.pop("image_embeds", None)
image_grid_thw = kwargs.pop("image_grid_thw", None)
@@ -535,7 +539,7 @@ class KeyeVL1_5ForConditionalGeneration(
def _parse_and_validate_video_input(
self, **kwargs: object
) -> Optional[KeyeVL1_5VideoInputs]:
) -> KeyeVL1_5VideoInputs | None:
pixel_values_videos = kwargs.pop("pixel_values_videos", None)
video_embeds = kwargs.pop("video_embeds", None)
video_grid_thw = kwargs.pop("video_grid_thw", None)
@@ -595,19 +599,19 @@ class KeyeVL1_5ForConditionalGeneration(
cls,
input_tokens: list[int],
hf_config: PretrainedConfig,
image_grid_thw: Union[list[list[int]], torch.Tensor],
video_grid_thw: Union[list[list[int]], torch.Tensor],
image_grid_thw: list[list[int]] | torch.Tensor,
video_grid_thw: list[list[int]] | torch.Tensor,
context_len: int = 0,
seq_len: Optional[int] = None,
second_per_grid_ts: Optional[list[float]] = None,
audio_feature_lengths: Optional[torch.Tensor] = None,
seq_len: int | None = None,
second_per_grid_ts: list[float] | None = None,
audio_feature_lengths: torch.Tensor | None = None,
use_audio_in_video: bool = False,
) -> tuple[torch.Tensor, int]:
if isinstance(video_grid_thw, list) and len(video_grid_thw) > 0:
video_grid_thw = video_grid_thw[0]
"""Get mrope input positions and delta value (Keye series)."""
def split_thw(grid_thw: Union[torch.Tensor, list[int]]) -> list[list[int]]:
def split_thw(grid_thw: torch.Tensor | list[int]) -> list[list[int]]:
"""
Split grid_thw along the t dimension.