Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -7,15 +7,16 @@ import dataclasses
import functools
import json
import sys
from collections.abc import Callable
from dataclasses import MISSING, dataclass, fields, is_dataclass
from itertools import permutations
from types import UnionType
from typing import (
TYPE_CHECKING,
Annotated,
Any,
Callable,
Literal,
Optional,
TypeAlias,
TypeVar,
Union,
cast,
@@ -98,8 +99,8 @@ logger = init_logger(__name__)
# object is used to allow for special typing forms
T = TypeVar("T")
TypeHint = Union[type[Any], object]
TypeHintT = Union[type[T], object]
TypeHint: TypeAlias = type[Any] | object
TypeHintT: TypeAlias = type[T] | object
def parse_type(return_type: Callable[[str], T]) -> Callable[[str], T]:
@@ -114,8 +115,8 @@ def parse_type(return_type: Callable[[str], T]) -> Callable[[str], T]:
return _parse_type
def optional_type(return_type: Callable[[str], T]) -> Callable[[str], Optional[T]]:
def _optional_type(val: str) -> Optional[T]:
def optional_type(return_type: Callable[[str], T]) -> Callable[[str], T | None]:
def _optional_type(val: str) -> T | None:
if val == "" or val == "None":
return None
return parse_type(return_type)(val)
@@ -123,7 +124,7 @@ def optional_type(return_type: Callable[[str], T]) -> Callable[[str], Optional[T
return _optional_type
def union_dict_and_str(val: str) -> Optional[Union[str, dict[str, str]]]:
def union_dict_and_str(val: str) -> str | dict[str, str] | None:
if not re.match(r"(?s)^\s*{.*}\s*$", val):
return str(val)
return optional_type(json.loads)(val)
@@ -174,7 +175,8 @@ def get_type_hints(type_hint: TypeHint) -> set[TypeHint]:
if origin is Annotated:
type_hints.update(get_type_hints(args[0]))
elif origin is Union:
elif origin in {Union, UnionType}:
# Union for Union[X, Y] and UnionType for X | Y
for arg in args:
type_hints.update(get_type_hints(arg))
else:
@@ -195,7 +197,7 @@ NEEDS_HELP = (
@functools.lru_cache(maxsize=30)
def _compute_kwargs(cls: ConfigType) -> dict[str, Any]:
def _compute_kwargs(cls: ConfigType) -> dict[str, dict[str, Any]]:
# Save time only getting attr docs if we're generating help text
cls_docs = get_attr_docs(cls) if NEEDS_HELP else {}
kwargs = {}
@@ -262,7 +264,8 @@ def _compute_kwargs(cls: ConfigType) -> dict[str, Any]:
type_hint = get_type(type_hints, list)
types = get_args(type_hint)
list_type = types[0]
if get_origin(list_type) is Union:
if get_origin(list_type) in {Union, UnionType}:
# Union for Union[X, Y] and UnionType for X | Y
msg = "List type must contain str if it is a Union."
assert str in get_args(list_type), msg
list_type = str
@@ -310,7 +313,7 @@ def _compute_kwargs(cls: ConfigType) -> dict[str, Any]:
return kwargs
def get_kwargs(cls: ConfigType) -> dict[str, Any]:
def get_kwargs(cls: ConfigType) -> dict[str, dict[str, Any]]:
"""Return argparse kwargs for the given Config dataclass.
If `--help` or `mkdocs` are not present in the command line command, the
@@ -328,43 +331,43 @@ class EngineArgs:
"""Arguments for vLLM engine."""
model: str = ModelConfig.model
served_model_name: Optional[Union[str, list[str]]] = ModelConfig.served_model_name
tokenizer: Optional[str] = ModelConfig.tokenizer
hf_config_path: Optional[str] = ModelConfig.hf_config_path
served_model_name: str | list[str] | None = ModelConfig.served_model_name
tokenizer: str | None = ModelConfig.tokenizer
hf_config_path: str | None = ModelConfig.hf_config_path
runner: RunnerOption = ModelConfig.runner
convert: ConvertOption = ModelConfig.convert
task: Optional[TaskOption] = ModelConfig.task
task: TaskOption | None = ModelConfig.task
skip_tokenizer_init: bool = ModelConfig.skip_tokenizer_init
enable_prompt_embeds: bool = ModelConfig.enable_prompt_embeds
tokenizer_mode: TokenizerMode = ModelConfig.tokenizer_mode
trust_remote_code: bool = ModelConfig.trust_remote_code
allowed_local_media_path: str = ModelConfig.allowed_local_media_path
allowed_media_domains: Optional[list[str]] = ModelConfig.allowed_media_domains
download_dir: Optional[str] = LoadConfig.download_dir
allowed_media_domains: list[str] | None = ModelConfig.allowed_media_domains
download_dir: str | None = LoadConfig.download_dir
safetensors_load_strategy: str = LoadConfig.safetensors_load_strategy
load_format: Union[str, LoadFormats] = LoadConfig.load_format
load_format: str | LoadFormats = LoadConfig.load_format
config_format: str = ModelConfig.config_format
dtype: ModelDType = ModelConfig.dtype
kv_cache_dtype: CacheDType = CacheConfig.cache_dtype
seed: Optional[int] = ModelConfig.seed
max_model_len: Optional[int] = ModelConfig.max_model_len
seed: int | None = ModelConfig.seed
max_model_len: int | None = ModelConfig.max_model_len
cuda_graph_sizes: list[int] = get_field(SchedulerConfig, "cuda_graph_sizes")
# Note: Specifying a custom executor backend by passing a class
# is intended for expert use only. The API may change without
# notice.
distributed_executor_backend: Optional[
Union[str, DistributedExecutorBackend, type[ExecutorBase]]
] = ParallelConfig.distributed_executor_backend
distributed_executor_backend: (
str | DistributedExecutorBackend | type[ExecutorBase] | None
) = ParallelConfig.distributed_executor_backend
# number of P/D disaggregation (or other disaggregation) workers
pipeline_parallel_size: int = ParallelConfig.pipeline_parallel_size
tensor_parallel_size: int = ParallelConfig.tensor_parallel_size
decode_context_parallel_size: int = ParallelConfig.decode_context_parallel_size
data_parallel_size: int = ParallelConfig.data_parallel_size
data_parallel_rank: Optional[int] = None
data_parallel_start_rank: Optional[int] = None
data_parallel_size_local: Optional[int] = None
data_parallel_address: Optional[str] = None
data_parallel_rpc_port: Optional[int] = None
data_parallel_rank: int | None = None
data_parallel_start_rank: int | None = None
data_parallel_size_local: int | None = None
data_parallel_address: str | None = None
data_parallel_rpc_port: int | None = None
data_parallel_hybrid_lb: bool = False
data_parallel_backend: str = ParallelConfig.data_parallel_backend
enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
@@ -385,11 +388,11 @@ class EngineArgs:
eplb_window_size: int = EPLBConfig.window_size
eplb_step_interval: int = EPLBConfig.step_interval
eplb_log_balancedness: bool = EPLBConfig.log_balancedness
max_parallel_loading_workers: Optional[int] = (
max_parallel_loading_workers: int | None = (
ParallelConfig.max_parallel_loading_workers
)
block_size: Optional[BlockSize] = CacheConfig.block_size
enable_prefix_caching: Optional[bool] = CacheConfig.enable_prefix_caching
block_size: BlockSize | None = CacheConfig.block_size
enable_prefix_caching: bool | None = CacheConfig.enable_prefix_caching
prefix_caching_hash_algo: PrefixCachingHashAlgo = (
CacheConfig.prefix_caching_hash_algo
)
@@ -398,62 +401,62 @@ class EngineArgs:
swap_space: float = CacheConfig.swap_space
cpu_offload_gb: float = CacheConfig.cpu_offload_gb
gpu_memory_utilization: float = CacheConfig.gpu_memory_utilization
kv_cache_memory_bytes: Optional[int] = CacheConfig.kv_cache_memory_bytes
max_num_batched_tokens: Optional[int] = SchedulerConfig.max_num_batched_tokens
kv_cache_memory_bytes: int | None = CacheConfig.kv_cache_memory_bytes
max_num_batched_tokens: int | None = SchedulerConfig.max_num_batched_tokens
max_num_partial_prefills: int = SchedulerConfig.max_num_partial_prefills
max_long_partial_prefills: int = SchedulerConfig.max_long_partial_prefills
long_prefill_token_threshold: int = SchedulerConfig.long_prefill_token_threshold
max_num_seqs: Optional[int] = SchedulerConfig.max_num_seqs
max_num_seqs: int | None = SchedulerConfig.max_num_seqs
max_logprobs: int = ModelConfig.max_logprobs
logprobs_mode: LogprobsMode = ModelConfig.logprobs_mode
disable_log_stats: bool = False
revision: Optional[str] = ModelConfig.revision
code_revision: Optional[str] = ModelConfig.code_revision
revision: str | None = ModelConfig.revision
code_revision: str | None = ModelConfig.code_revision
rope_scaling: dict[str, Any] = get_field(ModelConfig, "rope_scaling")
rope_theta: Optional[float] = ModelConfig.rope_theta
hf_token: Optional[Union[bool, str]] = ModelConfig.hf_token
rope_theta: float | None = ModelConfig.rope_theta
hf_token: bool | str | None = ModelConfig.hf_token
hf_overrides: HfOverrides = get_field(ModelConfig, "hf_overrides")
tokenizer_revision: Optional[str] = ModelConfig.tokenizer_revision
quantization: Optional[QuantizationMethods] = ModelConfig.quantization
tokenizer_revision: str | None = ModelConfig.tokenizer_revision
quantization: QuantizationMethods | None = ModelConfig.quantization
enforce_eager: bool = ModelConfig.enforce_eager
disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce
limit_mm_per_prompt: dict[str, Union[int, dict[str, int]]] = get_field(
limit_mm_per_prompt: dict[str, int | dict[str, int]] = get_field(
MultiModalConfig, "limit_per_prompt"
)
interleave_mm_strings: bool = MultiModalConfig.interleave_mm_strings
media_io_kwargs: dict[str, dict[str, Any]] = get_field(
MultiModalConfig, "media_io_kwargs"
)
mm_processor_kwargs: Optional[dict[str, Any]] = MultiModalConfig.mm_processor_kwargs
mm_processor_kwargs: dict[str, Any] | None = MultiModalConfig.mm_processor_kwargs
disable_mm_preprocessor_cache: bool = False # DEPRECATED
mm_processor_cache_gb: float = MultiModalConfig.mm_processor_cache_gb
mm_processor_cache_type: Optional[MMCacheType] = (
mm_processor_cache_type: MMCacheType | None = (
MultiModalConfig.mm_processor_cache_type
)
mm_shm_cache_max_object_size_mb: int = (
MultiModalConfig.mm_shm_cache_max_object_size_mb
)
mm_encoder_tp_mode: MMEncoderTPMode = MultiModalConfig.mm_encoder_tp_mode
io_processor_plugin: Optional[str] = None
io_processor_plugin: str | None = None
skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling
video_pruning_rate: float = MultiModalConfig.video_pruning_rate
# LoRA fields
enable_lora: bool = False
max_loras: int = LoRAConfig.max_loras
max_lora_rank: int = LoRAConfig.max_lora_rank
default_mm_loras: Optional[dict[str, str]] = LoRAConfig.default_mm_loras
default_mm_loras: dict[str, str] | None = LoRAConfig.default_mm_loras
fully_sharded_loras: bool = LoRAConfig.fully_sharded_loras
max_cpu_loras: Optional[int] = LoRAConfig.max_cpu_loras
lora_dtype: Optional[Union[str, torch.dtype]] = LoRAConfig.lora_dtype
max_cpu_loras: int | None = LoRAConfig.max_cpu_loras
lora_dtype: str | torch.dtype | None = LoRAConfig.lora_dtype
lora_extra_vocab_size: int = LoRAConfig.lora_extra_vocab_size
ray_workers_use_nsight: bool = ParallelConfig.ray_workers_use_nsight
num_gpu_blocks_override: Optional[int] = CacheConfig.num_gpu_blocks_override
num_gpu_blocks_override: int | None = CacheConfig.num_gpu_blocks_override
num_lookahead_slots: int = SchedulerConfig.num_lookahead_slots
model_loader_extra_config: dict = get_field(LoadConfig, "model_loader_extra_config")
ignore_patterns: Union[str, list[str]] = get_field(LoadConfig, "ignore_patterns")
ignore_patterns: str | list[str] = get_field(LoadConfig, "ignore_patterns")
enable_chunked_prefill: Optional[bool] = SchedulerConfig.enable_chunked_prefill
enable_chunked_prefill: bool | None = SchedulerConfig.enable_chunked_prefill
disable_chunked_mm_input: bool = SchedulerConfig.disable_chunked_mm_input
disable_hybrid_kv_cache_manager: bool = (
@@ -465,35 +468,35 @@ class EngineArgs:
)
reasoning_parser: str = StructuredOutputsConfig.reasoning_parser
# Deprecated guided decoding fields
guided_decoding_backend: Optional[str] = None
guided_decoding_disable_fallback: Optional[bool] = None
guided_decoding_disable_any_whitespace: Optional[bool] = None
guided_decoding_disable_additional_properties: Optional[bool] = None
guided_decoding_backend: str | None = None
guided_decoding_disable_fallback: bool | None = None
guided_decoding_disable_any_whitespace: bool | None = None
guided_decoding_disable_additional_properties: bool | None = None
logits_processor_pattern: Optional[str] = ModelConfig.logits_processor_pattern
logits_processor_pattern: str | None = ModelConfig.logits_processor_pattern
speculative_config: Optional[dict[str, Any]] = None
speculative_config: dict[str, Any] | None = None
show_hidden_metrics_for_version: Optional[str] = (
show_hidden_metrics_for_version: str | None = (
ObservabilityConfig.show_hidden_metrics_for_version
)
otlp_traces_endpoint: Optional[str] = ObservabilityConfig.otlp_traces_endpoint
collect_detailed_traces: Optional[list[DetailedTraceModules]] = (
otlp_traces_endpoint: str | None = ObservabilityConfig.otlp_traces_endpoint
collect_detailed_traces: list[DetailedTraceModules] | None = (
ObservabilityConfig.collect_detailed_traces
)
scheduling_policy: SchedulerPolicy = SchedulerConfig.policy
scheduler_cls: Union[str, type[object]] = SchedulerConfig.scheduler_cls
scheduler_cls: str | type[object] = SchedulerConfig.scheduler_cls
pooler_config: Optional[PoolerConfig] = ModelConfig.pooler_config
override_pooler_config: Optional[Union[dict, PoolerConfig]] = (
pooler_config: PoolerConfig | None = ModelConfig.pooler_config
override_pooler_config: dict | PoolerConfig | None = (
ModelConfig.override_pooler_config
)
compilation_config: CompilationConfig = get_field(VllmConfig, "compilation_config")
worker_cls: str = ParallelConfig.worker_cls
worker_extension_cls: str = ParallelConfig.worker_extension_cls
kv_transfer_config: Optional[KVTransferConfig] = None
kv_events_config: Optional[KVEventsConfig] = None
kv_transfer_config: KVTransferConfig | None = None
kv_events_config: KVEventsConfig | None = None
generation_config: str = ModelConfig.generation_config
enable_sleep_mode: bool = ModelConfig.enable_sleep_mode
@@ -515,7 +518,7 @@ class EngineArgs:
# DEPRECATED
enable_multimodal_encoder_data_parallel: bool = False
logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = (
logits_processors: list[str | type[LogitsProcessor]] | None = (
ModelConfig.logits_processors
)
"""Custom logitproc types"""
@@ -1187,7 +1190,7 @@ class EngineArgs:
target_parallel_config: ParallelConfig,
enable_chunked_prefill: bool,
disable_log_stats: bool,
) -> Optional["SpeculativeConfig"]:
) -> SpeculativeConfig | None:
"""Initializes and returns a SpeculativeConfig object based on
`speculative_config`.
@@ -1214,7 +1217,7 @@ class EngineArgs:
def create_engine_config(
self,
usage_context: Optional[UsageContext] = None,
usage_context: UsageContext | None = None,
headless: bool = False,
) -> VllmConfig:
"""
@@ -1282,7 +1285,7 @@ class EngineArgs:
self.enable_chunked_prefill = False
assert self.enable_chunked_prefill is not None
sliding_window: Optional[int] = None
sliding_window: int | None = None
if not is_interleaved(model_config.hf_text_config):
# Only set CacheConfig.sliding_window if the model is all sliding
# window. Otherwise CacheConfig.sliding_window will override the