Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
import itertools
|
||||
from collections.abc import Mapping, Sequence
|
||||
from functools import partial
|
||||
from typing import Annotated, Any, Literal, Optional, Union
|
||||
from typing import Annotated, Any, Literal, TypeAlias
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -73,7 +73,7 @@ def split_thw(grid_thw: torch.Tensor) -> torch.Tensor:
|
||||
|
||||
|
||||
def get_num_patches(
|
||||
grid_thw: torch.Tensor, num_frames: Union[list[int], torch.Tensor]
|
||||
grid_thw: torch.Tensor, num_frames: list[int] | torch.Tensor
|
||||
) -> list[int]:
|
||||
"""
|
||||
Return num_patches per video.
|
||||
@@ -153,7 +153,9 @@ class KeyeVL1_5ImageEmbeddingInputs(TensorSchema):
|
||||
image_grid_thw: Annotated[torch.Tensor, TensorShape("ni", 3)]
|
||||
|
||||
|
||||
KeyeVL1_5ImageInputs = Union[KeyeVL1_5ImagePixelInputs, KeyeVL1_5ImageEmbeddingInputs]
|
||||
KeyeVL1_5ImageInputs: TypeAlias = (
|
||||
KeyeVL1_5ImagePixelInputs | KeyeVL1_5ImageEmbeddingInputs
|
||||
)
|
||||
|
||||
|
||||
class KeyeVL1_5VideoPixelInputs(TensorSchema):
|
||||
@@ -191,7 +193,9 @@ class KeyeVL1_5VideoEmbeddingInputs(TensorSchema):
|
||||
num_frames: torch.Tensor
|
||||
|
||||
|
||||
KeyeVL1_5VideoInputs = Union[KeyeVL1_5VideoPixelInputs, KeyeVL1_5VideoEmbeddingInputs]
|
||||
KeyeVL1_5VideoInputs: TypeAlias = (
|
||||
KeyeVL1_5VideoPixelInputs | KeyeVL1_5VideoEmbeddingInputs
|
||||
)
|
||||
|
||||
|
||||
class KeyeVL1_5Projector(nn.Module):
|
||||
@@ -199,7 +203,7 @@ class KeyeVL1_5Projector(nn.Module):
|
||||
self,
|
||||
text_config: PretrainedConfig,
|
||||
vision_config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
quant_config: QuantizationConfig | None = None,
|
||||
prefix: str = "",
|
||||
):
|
||||
super().__init__()
|
||||
@@ -233,9 +237,9 @@ class KeyeVL1_5Projector(nn.Module):
|
||||
|
||||
def forward(
|
||||
self,
|
||||
image_features: Union[torch.Tensor, tuple[torch.Tensor], list[torch.Tensor]],
|
||||
image_features: torch.Tensor | tuple[torch.Tensor] | list[torch.Tensor],
|
||||
image_grid_thw: list[tuple[int, int, int]],
|
||||
) -> Union[torch.Tensor, list[torch.Tensor]]:
|
||||
) -> torch.Tensor | list[torch.Tensor]:
|
||||
m1, m2 = self.merge_kernel_size
|
||||
if isinstance(image_features, (list, tuple)):
|
||||
processed_features = list()
|
||||
@@ -275,7 +279,7 @@ class KeyeVL1_5ProcessingInfo(KeyeProcessingInfo):
|
||||
|
||||
def get_supported_mm_limits(
|
||||
self,
|
||||
) -> Mapping[str, Optional[int]]:
|
||||
) -> Mapping[str, int | None]:
|
||||
return {"image": None, "video": 1}
|
||||
|
||||
|
||||
@@ -327,7 +331,7 @@ def _keye_field_config(
|
||||
class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
|
||||
def _parse_image_data(
|
||||
self,
|
||||
data: Union[dict[str, torch.Tensor], ModalityData[ImageItem]],
|
||||
data: dict[str, torch.Tensor] | ModalityData[ImageItem],
|
||||
) -> ModalityDataItems[Any, Any]:
|
||||
if isinstance(data, dict):
|
||||
return DictEmbeddingItems(
|
||||
@@ -344,7 +348,7 @@ class KeyeVL1_5MultiModalDataParser(MultiModalDataParser):
|
||||
|
||||
def _parse_video_data(
|
||||
self,
|
||||
data: Union[dict[str, torch.Tensor], ModalityData[VideoItem]],
|
||||
data: dict[str, torch.Tensor] | ModalityData[VideoItem],
|
||||
) -> ModalityDataItems[Any, Any]:
|
||||
if isinstance(data, dict):
|
||||
return DictEmbeddingItems(
|
||||
@@ -499,7 +503,7 @@ class KeyeVL1_5ForConditionalGeneration(
|
||||
self,
|
||||
text_config: PretrainedConfig,
|
||||
vision_config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig] = None,
|
||||
quant_config: QuantizationConfig | None = None,
|
||||
prefix: str = "",
|
||||
) -> nn.Module:
|
||||
return KeyeVL1_5Projector(text_config, vision_config, quant_config, prefix)
|
||||
@@ -511,7 +515,7 @@ class KeyeVL1_5ForConditionalGeneration(
|
||||
|
||||
def _parse_and_validate_image_input(
|
||||
self, **kwargs: object
|
||||
) -> Optional[KeyeVL1_5ImageInputs]:
|
||||
) -> KeyeVL1_5ImageInputs | None:
|
||||
pixel_values = kwargs.pop("pixel_values", None)
|
||||
image_embeds = kwargs.pop("image_embeds", None)
|
||||
image_grid_thw = kwargs.pop("image_grid_thw", None)
|
||||
@@ -535,7 +539,7 @@ class KeyeVL1_5ForConditionalGeneration(
|
||||
|
||||
def _parse_and_validate_video_input(
|
||||
self, **kwargs: object
|
||||
) -> Optional[KeyeVL1_5VideoInputs]:
|
||||
) -> KeyeVL1_5VideoInputs | None:
|
||||
pixel_values_videos = kwargs.pop("pixel_values_videos", None)
|
||||
video_embeds = kwargs.pop("video_embeds", None)
|
||||
video_grid_thw = kwargs.pop("video_grid_thw", None)
|
||||
@@ -595,19 +599,19 @@ class KeyeVL1_5ForConditionalGeneration(
|
||||
cls,
|
||||
input_tokens: list[int],
|
||||
hf_config: PretrainedConfig,
|
||||
image_grid_thw: Union[list[list[int]], torch.Tensor],
|
||||
video_grid_thw: Union[list[list[int]], torch.Tensor],
|
||||
image_grid_thw: list[list[int]] | torch.Tensor,
|
||||
video_grid_thw: list[list[int]] | torch.Tensor,
|
||||
context_len: int = 0,
|
||||
seq_len: Optional[int] = None,
|
||||
second_per_grid_ts: Optional[list[float]] = None,
|
||||
audio_feature_lengths: Optional[torch.Tensor] = None,
|
||||
seq_len: int | None = None,
|
||||
second_per_grid_ts: list[float] | None = None,
|
||||
audio_feature_lengths: torch.Tensor | None = None,
|
||||
use_audio_in_video: bool = False,
|
||||
) -> tuple[torch.Tensor, int]:
|
||||
if isinstance(video_grid_thw, list) and len(video_grid_thw) > 0:
|
||||
video_grid_thw = video_grid_thw[0]
|
||||
"""Get mrope input positions and delta value (Keye series)."""
|
||||
|
||||
def split_thw(grid_thw: Union[torch.Tensor, list[int]]) -> list[list[int]]:
|
||||
def split_thw(grid_thw: torch.Tensor | list[int]) -> list[list[int]]:
|
||||
"""
|
||||
Split grid_thw along the t dimension.
|
||||
|
||||
|
||||
Reference in New Issue
Block a user