Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -3,7 +3,7 @@
import logging
import traceback
from itertools import chain
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING
from vllm import envs
from vllm.plugins import load_plugins_by_group
@@ -31,7 +31,7 @@ def vllm_version_matches_substr(substr: str) -> bool:
return substr in vllm_version
def tpu_platform_plugin() -> Optional[str]:
def tpu_platform_plugin() -> str | None:
logger.debug("Checking if TPU platform is available.")
# Check for Pathways TPU proxy
@@ -55,7 +55,7 @@ def tpu_platform_plugin() -> Optional[str]:
return None
def cuda_platform_plugin() -> Optional[str]:
def cuda_platform_plugin() -> str | None:
is_cuda = False
logger.debug("Checking if CUDA platform is available.")
try:
@@ -106,7 +106,7 @@ def cuda_platform_plugin() -> Optional[str]:
return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None
def rocm_platform_plugin() -> Optional[str]:
def rocm_platform_plugin() -> str | None:
is_rocm = False
logger.debug("Checking if ROCm platform is available.")
try:
@@ -127,7 +127,7 @@ def rocm_platform_plugin() -> Optional[str]:
return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None
def xpu_platform_plugin() -> Optional[str]:
def xpu_platform_plugin() -> str | None:
is_xpu = False
logger.debug("Checking if XPU platform is available.")
try:
@@ -154,7 +154,7 @@ def xpu_platform_plugin() -> Optional[str]:
return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None
def cpu_platform_plugin() -> Optional[str]:
def cpu_platform_plugin() -> str | None:
is_cpu = False
logger.debug("Checking if CPU platform is available.")
try:

View File

@@ -4,13 +4,13 @@
import json
import os
import platform
import re
import subprocess
import sys
from dataclasses import dataclass
from importlib.util import find_spec
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING
import regex as re
import torch
from vllm.logger import init_logger
@@ -128,7 +128,7 @@ class CpuPlatform(Platform):
selected_backend: "_Backend",
head_size: int,
dtype: torch.dtype,
kv_cache_dtype: Optional[str],
kv_cache_dtype: str | None,
block_size: int,
use_v1: bool,
use_mla: bool,

View File

@@ -5,9 +5,10 @@ pynvml. However, it should not initialize cuda context.
"""
import os
from collections.abc import Callable
from datetime import timedelta
from functools import cache, wraps
from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
from typing import TYPE_CHECKING, TypeVar
import torch
from torch.distributed import PrefixStore, ProcessGroup
@@ -85,7 +86,7 @@ class CudaPlatformBase(Platform):
_ = torch.zeros(1, device=device)
@classmethod
def get_device_capability(cls, device_id: int = 0) -> Optional[DeviceCapability]:
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
raise NotImplementedError
@classmethod
@@ -210,7 +211,7 @@ class CudaPlatformBase(Platform):
@classmethod
def get_current_memory_usage(
cls, device: Optional[torch.types.Device] = None
cls, device: torch.types.Device | None = None
) -> float:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats(device)
@@ -594,7 +595,7 @@ class NvmlCudaPlatform(CudaPlatformBase):
@classmethod
@cache
@with_nvml_context
def get_device_capability(cls, device_id: int = 0) -> Optional[DeviceCapability]:
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
try:
physical_device_id = cls.device_id_to_physical_device_id(device_id)
handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id)
@@ -607,7 +608,7 @@ class NvmlCudaPlatform(CudaPlatformBase):
@with_nvml_context
def has_device_capability(
cls,
capability: Union[tuple[int, int], int],
capability: tuple[int, int] | int,
device_id: int = 0,
) -> bool:
try:

View File

@@ -8,7 +8,7 @@ import random
import sys
from datetime import timedelta
from platform import uname
from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
from typing import TYPE_CHECKING, Any, NamedTuple
import numpy as np
import torch
@@ -20,18 +20,16 @@ from vllm.logger import init_logger
if TYPE_CHECKING:
from vllm.attention.backends.registry import _Backend
from vllm.config import ModelConfig, VllmConfig
from vllm.lora.request import LoRARequest
from vllm.pooling_params import PoolingParams
from vllm.sampling_params import SamplingParams
from vllm.utils import FlexibleArgumentParser
else:
_Backend = None
ModelConfig = None
VllmConfig = None
LoRARequest = None
PoolingParams = None
SamplingParams = None
FlexibleArgumentParser = None
_Backend = object
ModelConfig = object
VllmConfig = object
PoolingParams = object
SamplingParams = object
FlexibleArgumentParser = object
logger = init_logger(__name__)
@@ -113,7 +111,7 @@ class Platform:
additional_env_vars: list[str] = []
_global_graph_pool: Optional[Any] = None
_global_graph_pool: Any | None = None
@property
def supported_dtypes(self) -> list[torch.dtype]:
@@ -180,7 +178,7 @@ class Platform:
import vllm._moe_C # noqa: F401
@classmethod
def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> "_Backend":
def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> _Backend:
from vllm.attention.backends.registry import _Backend
return _Backend.TORCH_SDPA
@@ -188,10 +186,10 @@ class Platform:
@classmethod
def get_attn_backend_cls(
cls,
selected_backend: "_Backend",
selected_backend: _Backend,
head_size: int,
dtype: torch.dtype,
kv_cache_dtype: Optional[str],
kv_cache_dtype: str | None,
block_size: int,
use_v1: bool,
use_mla: bool,
@@ -205,14 +203,14 @@ class Platform:
def get_device_capability(
cls,
device_id: int = 0,
) -> Optional[DeviceCapability]:
) -> DeviceCapability | None:
"""Stateless version of [torch.cuda.get_device_capability][]."""
return None
@classmethod
def has_device_capability(
cls,
capability: Union[tuple[int, int], int],
capability: tuple[int, int] | int,
device_id: int = 0,
) -> bool:
"""
@@ -236,7 +234,7 @@ class Platform:
@classmethod
def is_device_capability(
cls,
capability: Union[tuple[int, int], int],
capability: tuple[int, int] | int,
device_id: int = 0,
) -> bool:
"""
@@ -283,7 +281,7 @@ class Platform:
return torch.inference_mode(mode=True)
@classmethod
def seed_everything(cls, seed: Optional[int] = None) -> None:
def seed_everything(cls, seed: int | None = None) -> None:
"""
Set the seed of each random module.
`torch.manual_seed` will set seed on all devices.
@@ -304,7 +302,7 @@ class Platform:
@classmethod
def pre_register_and_update(
cls, parser: Optional[FlexibleArgumentParser] = None
cls, parser: FlexibleArgumentParser | None = None
) -> None:
"""
Do some pre-registration or update action for the current platform.
@@ -389,7 +387,7 @@ class Platform:
@classmethod
def get_current_memory_usage(
cls, device: Optional[torch.types.Device] = None
cls, device: torch.types.Device | None = None
) -> float:
"""
Return the memory usage in bytes.
@@ -501,7 +499,7 @@ class Platform:
def validate_request(
cls,
prompt: PromptType,
params: Union[SamplingParams, PoolingParams],
params: SamplingParams | PoolingParams,
processed_inputs: ProcessorInputs,
) -> None:
"""Raises if this request is unsupported on this platform"""
@@ -557,7 +555,7 @@ class Platform:
@classmethod
def is_kv_cache_dtype_supported(
cls, kv_cache_dtype: str, model_config: "ModelConfig"
cls, kv_cache_dtype: str, model_config: ModelConfig
) -> bool:
"""
Returns if the kv_cache_dtype is supported by the current platform.
@@ -617,7 +615,7 @@ class Platform:
return {}
@classmethod
def get_nixl_memory_type(cls) -> Optional[str]:
def get_nixl_memory_type(cls) -> str | None:
"""
Returns the nixl memory type for the current platform.
"""

View File

@@ -4,7 +4,7 @@
import os
from datetime import timedelta
from functools import cache, lru_cache, wraps
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING
import torch
from torch.distributed import PrefixStore, ProcessGroup
@@ -140,8 +140,8 @@ def use_rocm_custom_paged_attention(
max_seq_len: int,
sliding_window: int,
kv_cache_dtype: str,
alibi_slopes: Optional[torch.Tensor] = None,
sinks: Optional[torch.Tensor] = None,
alibi_slopes: torch.Tensor | None = None,
sinks: torch.Tensor | None = None,
) -> bool:
GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
ON_GFX9 = any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942", "gfx950"])
@@ -320,7 +320,7 @@ class RocmPlatform(Platform):
@classmethod
@lru_cache(maxsize=8)
def get_device_capability(cls, device_id: int = 0) -> Optional[DeviceCapability]:
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
major, minor = torch.cuda.get_device_capability(device_id)
return DeviceCapability(major=major, minor=minor)
@@ -420,7 +420,7 @@ class RocmPlatform(Platform):
@classmethod
def get_current_memory_usage(
cls, device: Optional[torch.types.Device] = None
cls, device: torch.types.Device | None = None
) -> float:
torch.cuda.reset_peak_memory_stats(device)
return torch.cuda.mem_get_info(device)[1] - torch.cuda.mem_get_info(device)[0]

View File

@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import contextlib
from typing import TYPE_CHECKING, Optional, Union, cast
from typing import TYPE_CHECKING, cast
import torch
from tpu_info import device
@@ -57,7 +57,7 @@ class TpuPlatform(Platform):
selected_backend: "_Backend",
head_size: int,
dtype: torch.dtype,
kv_cache_dtype: Optional[str],
kv_cache_dtype: str | None,
block_size: int,
use_v1: bool,
use_mla: bool,
@@ -211,7 +211,7 @@ class TpuPlatform(Platform):
def validate_request(
cls,
prompt: PromptType,
params: Union[SamplingParams, PoolingParams],
params: SamplingParams | PoolingParams,
processed_inputs: ProcessorInputs,
) -> None:
"""Raises if this request is unsupported on this platform"""

View File

@@ -3,7 +3,7 @@
import contextlib
import os
from typing import TYPE_CHECKING, Optional
from typing import TYPE_CHECKING
import torch
@@ -47,7 +47,7 @@ class XPUPlatform(Platform):
selected_backend: "_Backend",
head_size: int,
dtype: torch.dtype,
kv_cache_dtype: Optional[str],
kv_cache_dtype: str | None,
block_size: int,
use_v1: bool,
use_mla: bool,
@@ -113,7 +113,7 @@ class XPUPlatform(Platform):
def get_device_capability(
cls,
device_id: int = 0,
) -> Optional[DeviceCapability]:
) -> DeviceCapability | None:
# capacity format differs from cuda's and will cause unexpected
# failure, so use None directly
return None
@@ -213,7 +213,7 @@ class XPUPlatform(Platform):
@classmethod
def get_current_memory_usage(
cls, device: Optional[torch.types.Device] = None
cls, device: torch.types.Device | None = None
) -> float:
torch.xpu.reset_peak_memory_stats(device)
return torch.xpu.max_memory_allocated(device)