Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
# Licensed under The MIT License [see LICENSE for details]
|
||||
# --------------------------------------------------------
|
||||
from collections.abc import Iterable, Mapping, Sequence
|
||||
from typing import Annotated, Literal, Optional, Union
|
||||
from typing import Annotated, Literal, TypeAlias
|
||||
|
||||
import regex as re
|
||||
import torch
|
||||
@@ -111,12 +111,10 @@ class InternS1ImageEmbeddingInputs(TensorSchema):
|
||||
"""
|
||||
|
||||
type: Literal["image_embeds"] = "image_embeds"
|
||||
data: Annotated[
|
||||
Union[torch.Tensor, list[torch.Tensor]], TensorShape("ni", "tifs", "hs")
|
||||
]
|
||||
data: Annotated[torch.Tensor | list[torch.Tensor], TensorShape("ni", "tifs", "hs")]
|
||||
|
||||
|
||||
InternS1ImageInputs = Union[InternS1ImagePixelInputs, InternS1ImageEmbeddingInputs]
|
||||
InternS1ImageInputs: TypeAlias = InternS1ImagePixelInputs | InternS1ImageEmbeddingInputs
|
||||
|
||||
|
||||
class InternS1VideoPixelInputs(TensorSchema):
|
||||
@@ -143,12 +141,10 @@ class InternS1VideoEmbeddingInputs(TensorSchema):
|
||||
"""
|
||||
|
||||
type: Literal["video_embeds"] = "video_embeds"
|
||||
data: Annotated[
|
||||
Union[torch.Tensor, list[torch.Tensor]], TensorShape("nv", "tvfs", "hs")
|
||||
]
|
||||
data: Annotated[torch.Tensor | list[torch.Tensor], TensorShape("nv", "tvfs", "hs")]
|
||||
|
||||
|
||||
InternS1VideoInputs = Union[InternS1VideoPixelInputs, InternS1VideoEmbeddingInputs]
|
||||
InternS1VideoInputs: TypeAlias = InternS1VideoPixelInputs | InternS1VideoEmbeddingInputs
|
||||
|
||||
|
||||
def resolve_interns1_min_max_num(
|
||||
@@ -190,7 +186,7 @@ class InternS1ProcessingInfo(BaseProcessingInfo):
|
||||
)
|
||||
return hf_processor
|
||||
|
||||
def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
|
||||
def get_supported_mm_limits(self) -> Mapping[str, int | None]:
|
||||
return {"image": None, "video": None}
|
||||
|
||||
def get_num_image_tokens(
|
||||
@@ -198,7 +194,7 @@ class InternS1ProcessingInfo(BaseProcessingInfo):
|
||||
*,
|
||||
image_width: int,
|
||||
image_height: int,
|
||||
processor: Optional["GotOcr2ImageProcessorFast"] = None,
|
||||
processor: GotOcr2ImageProcessorFast | None = None,
|
||||
) -> int:
|
||||
if processor is None:
|
||||
processor = self.get_hf_processor().image_processor
|
||||
@@ -213,7 +209,7 @@ class InternS1ProcessingInfo(BaseProcessingInfo):
|
||||
num_image_tokens = self.get_hf_processor().image_seq_length * num_image_patches
|
||||
return num_image_tokens
|
||||
|
||||
def resolve_target_ratios(self, use_thumbnail: Optional[bool] = None):
|
||||
def resolve_target_ratios(self, use_thumbnail: bool | None = None):
|
||||
image_processor = self.get_hf_processor().image_processor
|
||||
min_dynamic_patch = image_processor.min_patches
|
||||
max_dynamic_patch = image_processor.max_patches
|
||||
@@ -298,7 +294,7 @@ class InternS1DummyInputsBuilder(BaseDummyInputsBuilder[InternS1ProcessingInfo])
|
||||
self,
|
||||
seq_len: int,
|
||||
mm_counts: Mapping[str, int],
|
||||
mm_options: Optional[Mapping[str, BaseDummyOptions]] = None,
|
||||
mm_options: Mapping[str, BaseDummyOptions] | None = None,
|
||||
) -> MultiModalDataDict:
|
||||
target_width, target_height = self.info.get_image_size_with_most_features()
|
||||
target_num_frames = self.info.get_num_frames_with_most_features(
|
||||
@@ -523,7 +519,7 @@ class InternS1ForConditionalGeneration(
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:
|
||||
def get_placeholder_str(cls, modality: str, i: int) -> str | None:
|
||||
# transformers InternVLProcessor uses <IMG_CONTEXT> as the separator
|
||||
# refer to https://github.com/huggingface/transformers/blob/f90de364c2484c7c325bbe05befdcf487bd75b63/src/transformers/models/internvl/processing_internvl.py#L116
|
||||
if modality.startswith("image"):
|
||||
@@ -576,7 +572,7 @@ class InternS1ForConditionalGeneration(
|
||||
def _init_vision_model(
|
||||
self,
|
||||
config: PretrainedConfig,
|
||||
quant_config: Optional[QuantizationConfig],
|
||||
quant_config: QuantizationConfig | None,
|
||||
*,
|
||||
prefix: str,
|
||||
):
|
||||
@@ -620,7 +616,7 @@ class InternS1ForConditionalGeneration(
|
||||
|
||||
def _parse_and_validate_image_input(
|
||||
self, **kwargs: object
|
||||
) -> Optional[InternS1ImageInputs]:
|
||||
) -> InternS1ImageInputs | None:
|
||||
pixel_values = kwargs.pop("pixel_values", None)
|
||||
image_num_patches = kwargs.pop("image_num_patches", None)
|
||||
image_embeds = kwargs.pop("image_embeds", None)
|
||||
@@ -654,7 +650,7 @@ class InternS1ForConditionalGeneration(
|
||||
|
||||
def _parse_and_validate_video_input(
|
||||
self, **kwargs: object
|
||||
) -> Optional[InternS1VideoInputs]:
|
||||
) -> InternS1VideoInputs | None:
|
||||
pixel_values_flat_video = kwargs.pop("pixel_values_videos", None)
|
||||
video_num_patches = kwargs.pop("video_num_patches", None)
|
||||
video_embeds = kwargs.pop("video_embeds", None)
|
||||
@@ -688,7 +684,7 @@ class InternS1ForConditionalGeneration(
|
||||
|
||||
def _process_vision_input(
|
||||
self,
|
||||
image_input: Union[InternS1ImageInputs, InternS1VideoInputs],
|
||||
image_input: InternS1ImageInputs | InternS1VideoInputs,
|
||||
) -> tuple[torch.Tensor, ...]:
|
||||
if (
|
||||
image_input["type"] == "image_embeds"
|
||||
@@ -763,9 +759,9 @@ class InternS1ForConditionalGeneration(
|
||||
def get_input_embeddings(
|
||||
self,
|
||||
input_ids: torch.Tensor,
|
||||
multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
|
||||
multimodal_embeddings: MultiModalEmbeddings | None = None,
|
||||
*,
|
||||
is_multimodal: Optional[torch.Tensor] = None,
|
||||
is_multimodal: torch.Tensor | None = None,
|
||||
handle_oov_mm_token: bool = False,
|
||||
) -> torch.Tensor:
|
||||
if multimodal_embeddings is not None and len(multimodal_embeddings) > 0:
|
||||
@@ -786,8 +782,8 @@ class InternS1ForConditionalGeneration(
|
||||
self,
|
||||
input_ids: torch.Tensor,
|
||||
positions: torch.Tensor,
|
||||
intermediate_tensors: Optional[IntermediateTensors] = None,
|
||||
inputs_embeds: Optional[torch.Tensor] = None,
|
||||
intermediate_tensors: IntermediateTensors | None = None,
|
||||
inputs_embeds: torch.Tensor | None = None,
|
||||
**kwargs: object,
|
||||
) -> IntermediateTensors:
|
||||
if intermediate_tensors is not None:
|
||||
@@ -807,7 +803,7 @@ class InternS1ForConditionalGeneration(
|
||||
def compute_logits(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
) -> Optional[torch.Tensor]:
|
||||
) -> torch.Tensor | None:
|
||||
return self.language_model.compute_logits(hidden_states)
|
||||
|
||||
def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]:
|
||||
|
||||
Reference in New Issue
Block a user