Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any, Optional, Union, cast
|
||||
from typing import Any, cast
|
||||
|
||||
from typing_extensions import assert_never
|
||||
|
||||
@@ -46,9 +46,9 @@ class InputPreprocessor:
|
||||
def __init__(
|
||||
self,
|
||||
model_config: ModelConfig,
|
||||
tokenizer: Optional[AnyTokenizer],
|
||||
tokenizer: AnyTokenizer | None,
|
||||
mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY,
|
||||
mm_processor_cache: Optional[BaseMultiModalProcessorCache] = None,
|
||||
mm_processor_cache: BaseMultiModalProcessorCache | None = None,
|
||||
) -> None:
|
||||
super().__init__()
|
||||
|
||||
@@ -67,7 +67,7 @@ class InputPreprocessor:
|
||||
|
||||
return self.tokenizer
|
||||
|
||||
def get_bos_token_id(self) -> Optional[int]:
|
||||
def get_bos_token_id(self) -> int | None:
|
||||
if self.tokenizer is None:
|
||||
logger.warning_once(
|
||||
"Using None for BOS token id because tokenizer is not initialized"
|
||||
@@ -76,7 +76,7 @@ class InputPreprocessor:
|
||||
|
||||
return self.tokenizer.bos_token_id
|
||||
|
||||
def get_eos_token_id(self) -> Optional[int]:
|
||||
def get_eos_token_id(self) -> int | None:
|
||||
if self.tokenizer is None:
|
||||
logger.warning_once(
|
||||
"Using None for EOS token id because tokenizer is not initialized"
|
||||
@@ -85,7 +85,7 @@ class InputPreprocessor:
|
||||
|
||||
return self.tokenizer.eos_token_id
|
||||
|
||||
def get_decoder_start_token_id(self) -> Optional[int]:
|
||||
def get_decoder_start_token_id(self) -> int | None:
|
||||
"""
|
||||
Obtain the decoder start token id employed by an encoder/decoder
|
||||
model. Returns None for non-encoder/decoder models or if the
|
||||
@@ -157,7 +157,7 @@ class InputPreprocessor:
|
||||
|
||||
def _prepare_decoder_input_ids_for_generation(
|
||||
self,
|
||||
decoder_input_ids: Optional[list[int]],
|
||||
decoder_input_ids: list[int] | None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Prepares `decoder_input_ids` for generation with encoder-decoder models.
|
||||
@@ -194,7 +194,7 @@ class InputPreprocessor:
|
||||
|
||||
def _get_tokenization_kw(
|
||||
self,
|
||||
overrides: Optional[dict[str, Any]] = None,
|
||||
overrides: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
kwargs = dict[str, Any]()
|
||||
|
||||
@@ -212,7 +212,7 @@ class InputPreprocessor:
|
||||
def _tokenize_prompt(
|
||||
self,
|
||||
prompt: str,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Apply the model's tokenizer to a text prompt, returning the
|
||||
@@ -251,12 +251,12 @@ class InputPreprocessor:
|
||||
|
||||
def _process_multimodal(
|
||||
self,
|
||||
prompt: Union[str, list[int]],
|
||||
prompt: str | list[int],
|
||||
mm_data: MultiModalDataDict,
|
||||
mm_processor_kwargs: Optional[Mapping[str, object]],
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
mm_processor_kwargs: Mapping[str, object] | None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> MultiModalInputs:
|
||||
"""
|
||||
Apply the model's multi-modal processor to a multi-modal prompt,
|
||||
@@ -320,7 +320,7 @@ class InputPreprocessor:
|
||||
)
|
||||
|
||||
def _truncate_inputs(
|
||||
self, inputs: list[int], tokenization_kwargs: Optional[dict[str, Any]] = None
|
||||
self, inputs: list[int], tokenization_kwargs: dict[str, Any] | None = None
|
||||
) -> list[int]:
|
||||
if (
|
||||
not tokenization_kwargs
|
||||
@@ -339,15 +339,15 @@ class InputPreprocessor:
|
||||
def _process_tokens(
|
||||
self,
|
||||
parsed_content: TokensPrompt,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
) -> Union[TokenInputs, MultiModalInputs]:
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> TokenInputs | MultiModalInputs:
|
||||
prompt_token_ids = self._truncate_inputs(
|
||||
parsed_content["prompt_token_ids"], tokenization_kwargs
|
||||
)
|
||||
|
||||
inputs: Union[TokenInputs, MultiModalInputs]
|
||||
inputs: TokenInputs | MultiModalInputs
|
||||
if self.model_config.is_multimodal_model:
|
||||
inputs = self._process_multimodal(
|
||||
prompt_token_ids,
|
||||
@@ -370,13 +370,13 @@ class InputPreprocessor:
|
||||
def _process_text(
|
||||
self,
|
||||
parsed_content: TextPrompt,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
) -> Union[TokenInputs, MultiModalInputs]:
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> TokenInputs | MultiModalInputs:
|
||||
prompt_text = parsed_content["prompt"]
|
||||
|
||||
inputs: Union[TokenInputs, MultiModalInputs]
|
||||
inputs: TokenInputs | MultiModalInputs
|
||||
if self.model_config.is_multimodal_model:
|
||||
inputs = self._process_multimodal(
|
||||
prompt_text,
|
||||
@@ -403,9 +403,9 @@ class InputPreprocessor:
|
||||
def _prompt_to_llm_inputs(
|
||||
self,
|
||||
prompt: SingletonPrompt,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> SingletonInputs:
|
||||
"""
|
||||
Extract the singleton inputs from a prompt.
|
||||
@@ -445,7 +445,7 @@ class InputPreprocessor:
|
||||
def _build_enc_dec_llm_inputs(
|
||||
self,
|
||||
encoder_inputs: SingletonInputs,
|
||||
decoder_inputs: Optional[SingletonInputs],
|
||||
decoder_inputs: SingletonInputs | None,
|
||||
) -> EncoderDecoderInputs:
|
||||
if (
|
||||
encoder_inputs["type"] == "embeds"
|
||||
@@ -457,10 +457,8 @@ class InputPreprocessor:
|
||||
)
|
||||
|
||||
# Needed for mypy
|
||||
encoder_inputs = cast(Union[TokenInputs, MultiModalInputs], encoder_inputs)
|
||||
decoder_inputs = cast(
|
||||
Optional[Union[TokenInputs, MultiModalInputs]], decoder_inputs
|
||||
)
|
||||
encoder_inputs = cast(TokenInputs | MultiModalInputs, encoder_inputs)
|
||||
decoder_inputs = cast(TokenInputs | MultiModalInputs | None, decoder_inputs)
|
||||
|
||||
if decoder_inputs is None:
|
||||
if self.model_config.hf_config.model_type == "whisper":
|
||||
@@ -491,8 +489,8 @@ class InputPreprocessor:
|
||||
|
||||
def _split_enc_dec_mm_inputs(
|
||||
self,
|
||||
inputs: Union[SingletonInputs, MultiModalEncDecInputs],
|
||||
decoder_inputs_to_override: Optional[SingletonInputs] = None,
|
||||
inputs: SingletonInputs | MultiModalEncDecInputs,
|
||||
decoder_inputs_to_override: SingletonInputs | None = None,
|
||||
) -> tuple[SingletonInputs, SingletonInputs]:
|
||||
"""
|
||||
For encoder/decoder models only:
|
||||
@@ -509,11 +507,11 @@ class InputPreprocessor:
|
||||
|
||||
# Needed for mypy
|
||||
inputs = cast(
|
||||
Union[TokenInputs, MultiModalInputs, MultiModalEncDecInputs],
|
||||
TokenInputs | MultiModalInputs | MultiModalEncDecInputs,
|
||||
inputs,
|
||||
)
|
||||
decoder_inputs_to_override = cast(
|
||||
Optional[Union[TokenInputs, MultiModalInputs]],
|
||||
TokenInputs | MultiModalInputs | None,
|
||||
decoder_inputs_to_override,
|
||||
)
|
||||
|
||||
@@ -553,9 +551,9 @@ class InputPreprocessor:
|
||||
def _process_encoder_decoder_prompt(
|
||||
self,
|
||||
prompt: PromptType,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> EncoderDecoderInputs:
|
||||
"""
|
||||
For encoder/decoder models only:
|
||||
@@ -591,7 +589,7 @@ class InputPreprocessor:
|
||||
instance
|
||||
"""
|
||||
encoder_inputs: SingletonInputs
|
||||
decoder_inputs: Optional[SingletonInputs]
|
||||
decoder_inputs: SingletonInputs | None
|
||||
|
||||
if is_explicit_encoder_decoder_prompt(prompt):
|
||||
# `cast` is needed for mypy, but not pyright
|
||||
@@ -633,7 +631,7 @@ class InputPreprocessor:
|
||||
) -> DecoderOnlyInputs:
|
||||
if "prompt_token_ids" in prompt_inputs:
|
||||
prompt_inputs = cast(
|
||||
Union[TokenInputs, MultiModalInputs], prompt_inputs
|
||||
TokenInputs | MultiModalInputs, prompt_inputs
|
||||
) # Needed for mypy
|
||||
|
||||
return prompt_inputs
|
||||
@@ -641,9 +639,9 @@ class InputPreprocessor:
|
||||
def _process_decoder_only_prompt(
|
||||
self,
|
||||
prompt: SingletonPrompt,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> DecoderOnlyInputs:
|
||||
"""
|
||||
For decoder-only models:
|
||||
@@ -670,9 +668,9 @@ class InputPreprocessor:
|
||||
def _preprocess(
|
||||
self,
|
||||
prompt: PromptType,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> ProcessorInputs:
|
||||
if self.model_config.is_encoder_decoder:
|
||||
# Encoder-decoder model requires special mapping of
|
||||
@@ -699,9 +697,9 @@ class InputPreprocessor:
|
||||
def preprocess(
|
||||
self,
|
||||
prompt: PromptType,
|
||||
tokenization_kwargs: Optional[dict[str, Any]] = None,
|
||||
tokenization_kwargs: dict[str, Any] | None = None,
|
||||
*,
|
||||
mm_uuids: Optional[MultiModalUUIDDict] = None,
|
||||
mm_uuids: MultiModalUUIDDict | None = None,
|
||||
) -> ProcessorInputs:
|
||||
"""Preprocess the input prompt."""
|
||||
res = self._preprocess(
|
||||
@@ -718,7 +716,7 @@ class InputPreprocessor:
|
||||
|
||||
return res
|
||||
|
||||
def stat_mm_cache(self) -> Optional[MultiModalCacheStats]:
|
||||
def stat_mm_cache(self) -> MultiModalCacheStats | None:
|
||||
mm_cache_stats = self.mm_cache_stats
|
||||
if mm_cache_stats is None:
|
||||
return None
|
||||
|
||||
Reference in New Issue
Block a user