Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -11,7 +11,7 @@ import copy
import warnings
from abc import ABC, abstractmethod
from collections.abc import Iterable, Mapping, Sequence
from typing import Annotated, Any, Literal, Optional, TypedDict, TypeVar, Union
from typing import Annotated, Any, Literal, TypeAlias, TypedDict, TypeVar
import numpy.typing as npt
import torch
@@ -110,7 +110,7 @@ class NanoNemotronVLImagePixelInputs(TypedDict):
class NanoNemotronVLImageEmbeddinInputs(TypedDict):
type: Literal["image_embeds"]
data: Union[torch.Tensor, list[torch.Tensor]]
data: torch.Tensor | list[torch.Tensor]
"""
A tensor of shape `(num_images, total_image_feature_size, hidden_size)`
or a list of tensors of shape `(total_image_feature_size, hidden_size)`
@@ -119,9 +119,9 @@ class NanoNemotronVLImageEmbeddinInputs(TypedDict):
"""
NanoNemotronVLImageInputs = Union[
NanoNemotronVLImagePixelInputs, NanoNemotronVLImageEmbeddinInputs
]
NanoNemotronVLImageInputs: TypeAlias = (
NanoNemotronVLImagePixelInputs | NanoNemotronVLImageEmbeddinInputs
)
class NanoNemotronVLVideoPixelInputs(TensorSchema):
@@ -148,12 +148,12 @@ class NanoNemotronVLVideoEmbeddingInputs(TensorSchema):
"""
type: Literal["video_embeds"]
data: Annotated[Union[torch.Tensor, list[torch.Tensor]], TensorShape("n", "f", "h")]
data: Annotated[torch.Tensor | list[torch.Tensor], TensorShape("n", "f", "h")]
NanoNemotronVLVideoInputs = Union[
NanoNemotronVLVideoPixelInputs, NanoNemotronVLVideoEmbeddingInputs
]
NanoNemotronVLVideoInputs: TypeAlias = (
NanoNemotronVLVideoPixelInputs | NanoNemotronVLVideoEmbeddingInputs
)
def dynamic_preprocess(
@@ -262,7 +262,7 @@ class BaseNanoNemotronVLProcessor(ABC):
config: PretrainedConfig,
tokenizer: AnyTokenizer,
*args,
max_num_tiles: Optional[int] = None,
max_num_tiles: int | None = None,
**kwargs,
) -> None:
super().__init__()
@@ -291,7 +291,7 @@ class BaseNanoNemotronVLProcessor(ABC):
def get_image_repl(
self,
feature_size: int,
num_patches: Optional[int],
num_patches: int | None,
) -> PromptUpdateDetails[str]:
raise NotImplementedError
@@ -354,7 +354,7 @@ class BaseNanoNemotronVLProcessor(ABC):
text = [t.replace("<image>", image_repl.full, 1) for t in text]
return text, image_inputs
def _make_batch_input(self, input_item: Optional[Union[Any, list[Any]]] = None):
def _make_batch_input(self, input_item: Any | list[Any] | None = None):
if input_item is None:
input_item = []
if not isinstance(input_item, list):
@@ -363,10 +363,10 @@ class BaseNanoNemotronVLProcessor(ABC):
def __call__(
self,
text: Optional[Union[str, list[str]]] = None,
images: Optional[Union[Image.Image, list[Image.Image]]] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
max_num_tiles: Optional[int] = None,
text: str | list[str] | None = None,
images: Image.Image | list[Image.Image] | None = None,
return_tensors: str | TensorType | None = None,
max_num_tiles: int | None = None,
) -> BatchFeature:
# Use default if not provided
if max_num_tiles is None:
@@ -399,12 +399,12 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
config: PretrainedConfig,
tokenizer: AnyTokenizer,
*,
max_num_tiles: Optional[int] = None,
min_dynamic_patch: Optional[int] = None,
max_dynamic_patch: Optional[int] = None,
dynamic_image_size: Optional[bool] = None,
video_token: Optional[str] = None,
video_pruning_rate: Optional[float] = None,
max_num_tiles: int | None = None,
min_dynamic_patch: int | None = None,
max_dynamic_patch: int | None = None,
dynamic_image_size: bool | None = None,
video_token: str | None = None,
video_pruning_rate: float | None = None,
) -> None:
super().__init__(
config=config,
@@ -423,7 +423,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
return self.video_token_id is not None
@property
def video_token_id(self) -> Optional[int]:
def video_token_id(self) -> int | None:
if self.video_token is None:
return None
return self.tokenizer.get_vocab().get(self.video_token, None)
@@ -436,7 +436,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
self,
videos: list[npt.NDArray],
max_num_tiles: int,
dynamic_image_size: Optional[bool] = None,
dynamic_image_size: bool | None = None,
) -> list[torch.Tensor]:
return [
video_to_pixel_values(
@@ -453,7 +453,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
text: list[str],
videos: list[npt.NDArray],
max_num_tiles: int,
dynamic_image_size: Optional[bool] = None,
dynamic_image_size: bool | None = None,
):
if len(videos) == 0 or not self.supports_video:
video_inputs = {}
@@ -508,12 +508,12 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
def __call__(
self,
text: Optional[Union[str, list[str]]] = None,
images: Optional[Union[Image.Image, list[Image.Image]]] = None,
videos: Optional[Union[npt.NDArray, list[npt.NDArray]]] = None,
return_tensors: Optional[Union[str, TensorType]] = None,
max_num_tiles: Optional[int] = None,
dynamic_image_size: Optional[bool] = None,
text: str | list[str] | None = None,
images: Image.Image | list[Image.Image] | None = None,
videos: npt.NDArray | list[npt.NDArray] | None = None,
return_tensors: str | TensorType | None = None,
max_num_tiles: int | None = None,
dynamic_image_size: bool | None = None,
) -> BatchFeature:
# Use default if not provided
if max_num_tiles is None:
@@ -545,7 +545,7 @@ class NanoNemotronVLProcessor(BaseNanoNemotronVLProcessor):
def get_image_repl(
self,
feature_size: int,
num_patches: Optional[int],
num_patches: int | None,
) -> PromptUpdateDetails[str]:
repl_features = IMG_CONTEXT * feature_size
repl_full = IMG_START + repl_features + IMG_END
@@ -598,7 +598,7 @@ class BaseNanoNemotronVLProcessingInfo(BaseProcessingInfo):
) -> BaseNanoNemotronVLProcessor:
raise NotImplementedError
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
def get_supported_mm_limits(self) -> Mapping[str, int | None]:
return {"image": None}
def get_num_image_tokens(
@@ -607,7 +607,7 @@ class BaseNanoNemotronVLProcessingInfo(BaseProcessingInfo):
image_width: int,
image_height: int,
max_num_tiles: int,
processor: Optional[BaseNanoNemotronVLProcessor],
processor: BaseNanoNemotronVLProcessor | None,
) -> int:
if processor is None:
processor = self.get_hf_processor()
@@ -673,10 +673,10 @@ class NanoNemotronVLProcessingInfo(BaseNanoNemotronVLProcessingInfo):
video_limit = {"video": None} if self.supports_video else {}
return {**super().get_supported_mm_limits(), **video_limit}
def get_video_token(self) -> Optional[str]:
def get_video_token(self) -> str | None:
return IMG_CONTEXT
def get_video_pruning_rate(self) -> Optional[float]:
def get_video_pruning_rate(self) -> float | None:
return self.ctx.get_mm_config().video_pruning_rate
def get_num_frames_with_most_features(
@@ -929,7 +929,7 @@ class NanoNemotronVLDummyInputsBuilder(BaseDummyInputsBuilder[_I]):
self,
seq_len: int,
mm_counts: Mapping[str, int],
mm_options: Optional[Mapping[str, BaseDummyOptions]] = None,
mm_options: Mapping[str, BaseDummyOptions] | None = None,
) -> MultiModalDataDict:
# Use default max_num_tiles for dummy data generation
max_num_tiles = 12
@@ -964,7 +964,7 @@ class NanoNemotronVLDummyInputsBuilder(
self,
seq_len: int,
mm_counts: Mapping[str, int],
mm_options: Optional[Mapping[str, BaseDummyOptions]] = None,
mm_options: Mapping[str, BaseDummyOptions] | None = None,
) -> MultiModalDataDict:
dummy_image = super().get_dummy_mm_data(
seq_len=seq_len, mm_counts=mm_counts, mm_options=mm_options
@@ -1000,7 +1000,7 @@ class NemotronH_Nano_VL_V2(
nn.Module, HasInnerState, IsHybrid, SupportsMultiModal, SupportsMultiModalPruning
):
@classmethod
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
if modality.startswith("image"):
return "<image>"
if modality.startswith("video"):
@@ -1097,7 +1097,7 @@ class NemotronH_Nano_VL_V2(
def _parse_and_validate_image_input(
self, **kwargs: object
) -> Optional[NanoNemotronVLImageInputs]:
) -> NanoNemotronVLImageInputs | None:
pixel_values_flat = kwargs.pop("pixel_values_flat", None)
image_num_patches = kwargs.pop("image_num_patches", None)
image_embeds = kwargs.pop("image_embeds", None)
@@ -1274,7 +1274,7 @@ class NemotronH_Nano_VL_V2(
def _parse_and_validate_video_input(
self, **kwargs: object
) -> Optional[NanoNemotronVLVideoPixelInputs]:
) -> NanoNemotronVLVideoPixelInputs | None:
pixel_values_flat_video = kwargs.pop("pixel_values_flat_video", None)
video_num_patches = kwargs.pop("video_num_patches", None)
video_embeds = kwargs.pop("video_embeds", None)
@@ -1365,10 +1365,10 @@ class NemotronH_Nano_VL_V2(
self,
input_ids: torch.Tensor,
positions: torch.Tensor,
intermediate_tensors: Optional[IntermediateTensors] = None,
inputs_embeds: Optional[torch.Tensor] = None,
intermediate_tensors: IntermediateTensors | None = None,
inputs_embeds: torch.Tensor | None = None,
**kwargs: object,
) -> Union[torch.Tensor, IntermediateTensors]:
) -> torch.Tensor | IntermediateTensors:
if intermediate_tensors is not None:
input_ids = None
inputs_embeds = None
@@ -1396,7 +1396,7 @@ class NemotronH_Nano_VL_V2(
def compute_logits(
self,
hidden_states: torch.Tensor,
) -> Optional[torch.Tensor]:
) -> torch.Tensor | None:
return self.language_model.compute_logits(hidden_states)
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]):