Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Mapping
from typing import Any, Optional, Union, cast
from typing import Any, cast
from typing_extensions import assert_never
@@ -46,9 +46,9 @@ class InputPreprocessor:
def __init__(
self,
model_config: ModelConfig,
tokenizer: Optional[AnyTokenizer],
tokenizer: AnyTokenizer | None,
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
mm_processor_cache: Optional[BaseMultiModalProcessorCache] = None,
mm_processor_cache: BaseMultiModalProcessorCache | None = None,
) -> None:
super().__init__()
@@ -67,7 +67,7 @@ class InputPreprocessor:
return self.tokenizer
def get_bos_token_id(self) -> Optional[int]:
def get_bos_token_id(self) -> int | None:
if self.tokenizer is None:
logger.warning_once(
"Using None for BOS token id because tokenizer is not initialized"
@@ -76,7 +76,7 @@ class InputPreprocessor:
return self.tokenizer.bos_token_id
def get_eos_token_id(self) -> Optional[int]:
def get_eos_token_id(self) -> int | None:
if self.tokenizer is None:
logger.warning_once(
"Using None for EOS token id because tokenizer is not initialized"
@@ -85,7 +85,7 @@ class InputPreprocessor:
return self.tokenizer.eos_token_id
def get_decoder_start_token_id(self) -> Optional[int]:
def get_decoder_start_token_id(self) -> int | None:
"""
Obtain the decoder start token id employed by an encoder/decoder
model. Returns None for non-encoder/decoder models or if the
@@ -157,7 +157,7 @@ class InputPreprocessor:
def _prepare_decoder_input_ids_for_generation(
self,
decoder_input_ids: Optional[list[int]],
decoder_input_ids: list[int] | None,
) -> list[int]:
"""
Prepares `decoder_input_ids` for generation with encoder-decoder models.
@@ -194,7 +194,7 @@ class InputPreprocessor:
def _get_tokenization_kw(
self,
overrides: Optional[dict[str, Any]] = None,
overrides: dict[str, Any] | None = None,
) -> dict[str, Any]:
kwargs = dict[str, Any]()
@@ -212,7 +212,7 @@ class InputPreprocessor:
def _tokenize_prompt(
self,
prompt: str,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
) -> list[int]:
"""
Apply the model's tokenizer to a text prompt, returning the
@@ -251,12 +251,12 @@ class InputPreprocessor:
def _process_multimodal(
self,
prompt: Union[str, list[int]],
prompt: str | list[int],
mm_data: MultiModalDataDict,
mm_processor_kwargs: Optional[Mapping[str, object]],
tokenization_kwargs: Optional[dict[str, Any]] = None,
mm_processor_kwargs: Mapping[str, object] | None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
mm_uuids: MultiModalUUIDDict | None = None,
) -> MultiModalInputs:
"""
Apply the model's multi-modal processor to a multi-modal prompt,
@@ -320,7 +320,7 @@ class InputPreprocessor:
)
def _truncate_inputs(
self, inputs: list[int], tokenization_kwargs: Optional[dict[str, Any]] = None
self, inputs: list[int], tokenization_kwargs: dict[str, Any] | None = None
) -> list[int]:
if (
not tokenization_kwargs
@@ -339,15 +339,15 @@ class InputPreprocessor:
def _process_tokens(
self,
parsed_content: TokensPrompt,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
) -> Union[TokenInputs, MultiModalInputs]:
mm_uuids: MultiModalUUIDDict | None = None,
) -> TokenInputs | MultiModalInputs:
prompt_token_ids = self._truncate_inputs(
parsed_content["prompt_token_ids"], tokenization_kwargs
)
inputs: Union[TokenInputs, MultiModalInputs]
inputs: TokenInputs | MultiModalInputs
if self.model_config.is_multimodal_model:
inputs = self._process_multimodal(
prompt_token_ids,
@@ -370,13 +370,13 @@ class InputPreprocessor:
def _process_text(
self,
parsed_content: TextPrompt,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
) -> Union[TokenInputs, MultiModalInputs]:
mm_uuids: MultiModalUUIDDict | None = None,
) -> TokenInputs | MultiModalInputs:
prompt_text = parsed_content["prompt"]
inputs: Union[TokenInputs, MultiModalInputs]
inputs: TokenInputs | MultiModalInputs
if self.model_config.is_multimodal_model:
inputs = self._process_multimodal(
prompt_text,
@@ -403,9 +403,9 @@ class InputPreprocessor:
def _prompt_to_llm_inputs(
self,
prompt: SingletonPrompt,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
mm_uuids: MultiModalUUIDDict | None = None,
) -> SingletonInputs:
"""
Extract the singleton inputs from a prompt.
@@ -445,7 +445,7 @@ class InputPreprocessor:
def _build_enc_dec_llm_inputs(
self,
encoder_inputs: SingletonInputs,
decoder_inputs: Optional[SingletonInputs],
decoder_inputs: SingletonInputs | None,
) -> EncoderDecoderInputs:
if (
encoder_inputs["type"] == "embeds"
@@ -457,10 +457,8 @@ class InputPreprocessor:
)
# Needed for mypy
encoder_inputs = cast(Union[TokenInputs, MultiModalInputs], encoder_inputs)
decoder_inputs = cast(
Optional[Union[TokenInputs, MultiModalInputs]], decoder_inputs
)
encoder_inputs = cast(TokenInputs | MultiModalInputs, encoder_inputs)
decoder_inputs = cast(TokenInputs | MultiModalInputs | None, decoder_inputs)
if decoder_inputs is None:
if self.model_config.hf_config.model_type == "whisper":
@@ -491,8 +489,8 @@ class InputPreprocessor:
def _split_enc_dec_mm_inputs(
self,
inputs: Union[SingletonInputs, MultiModalEncDecInputs],
decoder_inputs_to_override: Optional[SingletonInputs] = None,
inputs: SingletonInputs | MultiModalEncDecInputs,
decoder_inputs_to_override: SingletonInputs | None = None,
) -> tuple[SingletonInputs, SingletonInputs]:
"""
For encoder/decoder models only:
@@ -509,11 +507,11 @@ class InputPreprocessor:
# Needed for mypy
inputs = cast(
Union[TokenInputs, MultiModalInputs, MultiModalEncDecInputs],
TokenInputs | MultiModalInputs | MultiModalEncDecInputs,
inputs,
)
decoder_inputs_to_override = cast(
Optional[Union[TokenInputs, MultiModalInputs]],
TokenInputs | MultiModalInputs | None,
decoder_inputs_to_override,
)
@@ -553,9 +551,9 @@ class InputPreprocessor:
def _process_encoder_decoder_prompt(
self,
prompt: PromptType,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
mm_uuids: MultiModalUUIDDict | None = None,
) -> EncoderDecoderInputs:
"""
For encoder/decoder models only:
@@ -591,7 +589,7 @@ class InputPreprocessor:
instance
"""
encoder_inputs: SingletonInputs
decoder_inputs: Optional[SingletonInputs]
decoder_inputs: SingletonInputs | None
if is_explicit_encoder_decoder_prompt(prompt):
# `cast` is needed for mypy, but not pyright
@@ -633,7 +631,7 @@ class InputPreprocessor:
) -> DecoderOnlyInputs:
if "prompt_token_ids" in prompt_inputs:
prompt_inputs = cast(
Union[TokenInputs, MultiModalInputs], prompt_inputs
TokenInputs | MultiModalInputs, prompt_inputs
) # Needed for mypy
return prompt_inputs
@@ -641,9 +639,9 @@ class InputPreprocessor:
def _process_decoder_only_prompt(
self,
prompt: SingletonPrompt,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
mm_uuids: MultiModalUUIDDict | None = None,
) -> DecoderOnlyInputs:
"""
For decoder-only models:
@@ -670,9 +668,9 @@ class InputPreprocessor:
def _preprocess(
self,
prompt: PromptType,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
mm_uuids: MultiModalUUIDDict | None = None,
) -> ProcessorInputs:
if self.model_config.is_encoder_decoder:
# Encoder-decoder model requires special mapping of
@@ -699,9 +697,9 @@ class InputPreprocessor:
def preprocess(
self,
prompt: PromptType,
tokenization_kwargs: Optional[dict[str, Any]] = None,
tokenization_kwargs: dict[str, Any] | None = None,
*,
mm_uuids: Optional[MultiModalUUIDDict] = None,
mm_uuids: MultiModalUUIDDict | None = None,
) -> ProcessorInputs:
"""Preprocess the input prompt."""
res = self._preprocess(
@@ -718,7 +716,7 @@ class InputPreprocessor:
return res
def stat_mm_cache(self) -> Optional[MultiModalCacheStats]:
def stat_mm_cache(self) -> MultiModalCacheStats | None:
mm_cache_stats = self.mm_cache_stats
if mm_cache_stats is None:
return None