Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
import logging
|
||||
import traceback
|
||||
from itertools import chain
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from vllm import envs
|
||||
from vllm.plugins import load_plugins_by_group
|
||||
@@ -31,7 +31,7 @@ def vllm_version_matches_substr(substr: str) -> bool:
|
||||
return substr in vllm_version
|
||||
|
||||
|
||||
def tpu_platform_plugin() -> Optional[str]:
|
||||
def tpu_platform_plugin() -> str | None:
|
||||
logger.debug("Checking if TPU platform is available.")
|
||||
|
||||
# Check for Pathways TPU proxy
|
||||
@@ -55,7 +55,7 @@ def tpu_platform_plugin() -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def cuda_platform_plugin() -> Optional[str]:
|
||||
def cuda_platform_plugin() -> str | None:
|
||||
is_cuda = False
|
||||
logger.debug("Checking if CUDA platform is available.")
|
||||
try:
|
||||
@@ -106,7 +106,7 @@ def cuda_platform_plugin() -> Optional[str]:
|
||||
return "vllm.platforms.cuda.CudaPlatform" if is_cuda else None
|
||||
|
||||
|
||||
def rocm_platform_plugin() -> Optional[str]:
|
||||
def rocm_platform_plugin() -> str | None:
|
||||
is_rocm = False
|
||||
logger.debug("Checking if ROCm platform is available.")
|
||||
try:
|
||||
@@ -127,7 +127,7 @@ def rocm_platform_plugin() -> Optional[str]:
|
||||
return "vllm.platforms.rocm.RocmPlatform" if is_rocm else None
|
||||
|
||||
|
||||
def xpu_platform_plugin() -> Optional[str]:
|
||||
def xpu_platform_plugin() -> str | None:
|
||||
is_xpu = False
|
||||
logger.debug("Checking if XPU platform is available.")
|
||||
try:
|
||||
@@ -154,7 +154,7 @@ def xpu_platform_plugin() -> Optional[str]:
|
||||
return "vllm.platforms.xpu.XPUPlatform" if is_xpu else None
|
||||
|
||||
|
||||
def cpu_platform_plugin() -> Optional[str]:
|
||||
def cpu_platform_plugin() -> str | None:
|
||||
is_cpu = False
|
||||
logger.debug("Checking if CPU platform is available.")
|
||||
try:
|
||||
|
||||
@@ -4,13 +4,13 @@
|
||||
import json
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from importlib.util import find_spec
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import regex as re
|
||||
import torch
|
||||
|
||||
from vllm.logger import init_logger
|
||||
@@ -128,7 +128,7 @@ class CpuPlatform(Platform):
|
||||
selected_backend: "_Backend",
|
||||
head_size: int,
|
||||
dtype: torch.dtype,
|
||||
kv_cache_dtype: Optional[str],
|
||||
kv_cache_dtype: str | None,
|
||||
block_size: int,
|
||||
use_v1: bool,
|
||||
use_mla: bool,
|
||||
|
||||
@@ -5,9 +5,10 @@ pynvml. However, it should not initialize cuda context.
|
||||
"""
|
||||
|
||||
import os
|
||||
from collections.abc import Callable
|
||||
from datetime import timedelta
|
||||
from functools import cache, wraps
|
||||
from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union
|
||||
from typing import TYPE_CHECKING, TypeVar
|
||||
|
||||
import torch
|
||||
from torch.distributed import PrefixStore, ProcessGroup
|
||||
@@ -85,7 +86,7 @@ class CudaPlatformBase(Platform):
|
||||
_ = torch.zeros(1, device=device)
|
||||
|
||||
@classmethod
|
||||
def get_device_capability(cls, device_id: int = 0) -> Optional[DeviceCapability]:
|
||||
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
|
||||
raise NotImplementedError
|
||||
|
||||
@classmethod
|
||||
@@ -210,7 +211,7 @@ class CudaPlatformBase(Platform):
|
||||
|
||||
@classmethod
|
||||
def get_current_memory_usage(
|
||||
cls, device: Optional[torch.types.Device] = None
|
||||
cls, device: torch.types.Device | None = None
|
||||
) -> float:
|
||||
torch.cuda.empty_cache()
|
||||
torch.cuda.reset_peak_memory_stats(device)
|
||||
@@ -594,7 +595,7 @@ class NvmlCudaPlatform(CudaPlatformBase):
|
||||
@classmethod
|
||||
@cache
|
||||
@with_nvml_context
|
||||
def get_device_capability(cls, device_id: int = 0) -> Optional[DeviceCapability]:
|
||||
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
|
||||
try:
|
||||
physical_device_id = cls.device_id_to_physical_device_id(device_id)
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(physical_device_id)
|
||||
@@ -607,7 +608,7 @@ class NvmlCudaPlatform(CudaPlatformBase):
|
||||
@with_nvml_context
|
||||
def has_device_capability(
|
||||
cls,
|
||||
capability: Union[tuple[int, int], int],
|
||||
capability: tuple[int, int] | int,
|
||||
device_id: int = 0,
|
||||
) -> bool:
|
||||
try:
|
||||
|
||||
@@ -8,7 +8,7 @@ import random
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
from platform import uname
|
||||
from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union
|
||||
from typing import TYPE_CHECKING, Any, NamedTuple
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
@@ -20,18 +20,16 @@ from vllm.logger import init_logger
|
||||
if TYPE_CHECKING:
|
||||
from vllm.attention.backends.registry import _Backend
|
||||
from vllm.config import ModelConfig, VllmConfig
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.sampling_params import SamplingParams
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
else:
|
||||
_Backend = None
|
||||
ModelConfig = None
|
||||
VllmConfig = None
|
||||
LoRARequest = None
|
||||
PoolingParams = None
|
||||
SamplingParams = None
|
||||
FlexibleArgumentParser = None
|
||||
_Backend = object
|
||||
ModelConfig = object
|
||||
VllmConfig = object
|
||||
PoolingParams = object
|
||||
SamplingParams = object
|
||||
FlexibleArgumentParser = object
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -113,7 +111,7 @@ class Platform:
|
||||
|
||||
additional_env_vars: list[str] = []
|
||||
|
||||
_global_graph_pool: Optional[Any] = None
|
||||
_global_graph_pool: Any | None = None
|
||||
|
||||
@property
|
||||
def supported_dtypes(self) -> list[torch.dtype]:
|
||||
@@ -180,7 +178,7 @@ class Platform:
|
||||
import vllm._moe_C # noqa: F401
|
||||
|
||||
@classmethod
|
||||
def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> "_Backend":
|
||||
def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> _Backend:
|
||||
from vllm.attention.backends.registry import _Backend
|
||||
|
||||
return _Backend.TORCH_SDPA
|
||||
@@ -188,10 +186,10 @@ class Platform:
|
||||
@classmethod
|
||||
def get_attn_backend_cls(
|
||||
cls,
|
||||
selected_backend: "_Backend",
|
||||
selected_backend: _Backend,
|
||||
head_size: int,
|
||||
dtype: torch.dtype,
|
||||
kv_cache_dtype: Optional[str],
|
||||
kv_cache_dtype: str | None,
|
||||
block_size: int,
|
||||
use_v1: bool,
|
||||
use_mla: bool,
|
||||
@@ -205,14 +203,14 @@ class Platform:
|
||||
def get_device_capability(
|
||||
cls,
|
||||
device_id: int = 0,
|
||||
) -> Optional[DeviceCapability]:
|
||||
) -> DeviceCapability | None:
|
||||
"""Stateless version of [torch.cuda.get_device_capability][]."""
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def has_device_capability(
|
||||
cls,
|
||||
capability: Union[tuple[int, int], int],
|
||||
capability: tuple[int, int] | int,
|
||||
device_id: int = 0,
|
||||
) -> bool:
|
||||
"""
|
||||
@@ -236,7 +234,7 @@ class Platform:
|
||||
@classmethod
|
||||
def is_device_capability(
|
||||
cls,
|
||||
capability: Union[tuple[int, int], int],
|
||||
capability: tuple[int, int] | int,
|
||||
device_id: int = 0,
|
||||
) -> bool:
|
||||
"""
|
||||
@@ -283,7 +281,7 @@ class Platform:
|
||||
return torch.inference_mode(mode=True)
|
||||
|
||||
@classmethod
|
||||
def seed_everything(cls, seed: Optional[int] = None) -> None:
|
||||
def seed_everything(cls, seed: int | None = None) -> None:
|
||||
"""
|
||||
Set the seed of each random module.
|
||||
`torch.manual_seed` will set seed on all devices.
|
||||
@@ -304,7 +302,7 @@ class Platform:
|
||||
|
||||
@classmethod
|
||||
def pre_register_and_update(
|
||||
cls, parser: Optional[FlexibleArgumentParser] = None
|
||||
cls, parser: FlexibleArgumentParser | None = None
|
||||
) -> None:
|
||||
"""
|
||||
Do some pre-registration or update action for the current platform.
|
||||
@@ -389,7 +387,7 @@ class Platform:
|
||||
|
||||
@classmethod
|
||||
def get_current_memory_usage(
|
||||
cls, device: Optional[torch.types.Device] = None
|
||||
cls, device: torch.types.Device | None = None
|
||||
) -> float:
|
||||
"""
|
||||
Return the memory usage in bytes.
|
||||
@@ -501,7 +499,7 @@ class Platform:
|
||||
def validate_request(
|
||||
cls,
|
||||
prompt: PromptType,
|
||||
params: Union[SamplingParams, PoolingParams],
|
||||
params: SamplingParams | PoolingParams,
|
||||
processed_inputs: ProcessorInputs,
|
||||
) -> None:
|
||||
"""Raises if this request is unsupported on this platform"""
|
||||
@@ -557,7 +555,7 @@ class Platform:
|
||||
|
||||
@classmethod
|
||||
def is_kv_cache_dtype_supported(
|
||||
cls, kv_cache_dtype: str, model_config: "ModelConfig"
|
||||
cls, kv_cache_dtype: str, model_config: ModelConfig
|
||||
) -> bool:
|
||||
"""
|
||||
Returns if the kv_cache_dtype is supported by the current platform.
|
||||
@@ -617,7 +615,7 @@ class Platform:
|
||||
return {}
|
||||
|
||||
@classmethod
|
||||
def get_nixl_memory_type(cls) -> Optional[str]:
|
||||
def get_nixl_memory_type(cls) -> str | None:
|
||||
"""
|
||||
Returns the nixl memory type for the current platform.
|
||||
"""
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
import os
|
||||
from datetime import timedelta
|
||||
from functools import cache, lru_cache, wraps
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import torch
|
||||
from torch.distributed import PrefixStore, ProcessGroup
|
||||
@@ -140,8 +140,8 @@ def use_rocm_custom_paged_attention(
|
||||
max_seq_len: int,
|
||||
sliding_window: int,
|
||||
kv_cache_dtype: str,
|
||||
alibi_slopes: Optional[torch.Tensor] = None,
|
||||
sinks: Optional[torch.Tensor] = None,
|
||||
alibi_slopes: torch.Tensor | None = None,
|
||||
sinks: torch.Tensor | None = None,
|
||||
) -> bool:
|
||||
GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName
|
||||
ON_GFX9 = any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942", "gfx950"])
|
||||
@@ -320,7 +320,7 @@ class RocmPlatform(Platform):
|
||||
|
||||
@classmethod
|
||||
@lru_cache(maxsize=8)
|
||||
def get_device_capability(cls, device_id: int = 0) -> Optional[DeviceCapability]:
|
||||
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability | None:
|
||||
major, minor = torch.cuda.get_device_capability(device_id)
|
||||
return DeviceCapability(major=major, minor=minor)
|
||||
|
||||
@@ -420,7 +420,7 @@ class RocmPlatform(Platform):
|
||||
|
||||
@classmethod
|
||||
def get_current_memory_usage(
|
||||
cls, device: Optional[torch.types.Device] = None
|
||||
cls, device: torch.types.Device | None = None
|
||||
) -> float:
|
||||
torch.cuda.reset_peak_memory_stats(device)
|
||||
return torch.cuda.mem_get_info(device)[1] - torch.cuda.mem_get_info(device)[0]
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import contextlib
|
||||
from typing import TYPE_CHECKING, Optional, Union, cast
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
import torch
|
||||
from tpu_info import device
|
||||
@@ -57,7 +57,7 @@ class TpuPlatform(Platform):
|
||||
selected_backend: "_Backend",
|
||||
head_size: int,
|
||||
dtype: torch.dtype,
|
||||
kv_cache_dtype: Optional[str],
|
||||
kv_cache_dtype: str | None,
|
||||
block_size: int,
|
||||
use_v1: bool,
|
||||
use_mla: bool,
|
||||
@@ -211,7 +211,7 @@ class TpuPlatform(Platform):
|
||||
def validate_request(
|
||||
cls,
|
||||
prompt: PromptType,
|
||||
params: Union[SamplingParams, PoolingParams],
|
||||
params: SamplingParams | PoolingParams,
|
||||
processed_inputs: ProcessorInputs,
|
||||
) -> None:
|
||||
"""Raises if this request is unsupported on this platform"""
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import torch
|
||||
|
||||
@@ -47,7 +47,7 @@ class XPUPlatform(Platform):
|
||||
selected_backend: "_Backend",
|
||||
head_size: int,
|
||||
dtype: torch.dtype,
|
||||
kv_cache_dtype: Optional[str],
|
||||
kv_cache_dtype: str | None,
|
||||
block_size: int,
|
||||
use_v1: bool,
|
||||
use_mla: bool,
|
||||
@@ -113,7 +113,7 @@ class XPUPlatform(Platform):
|
||||
def get_device_capability(
|
||||
cls,
|
||||
device_id: int = 0,
|
||||
) -> Optional[DeviceCapability]:
|
||||
) -> DeviceCapability | None:
|
||||
# capacity format differs from cuda's and will cause unexpected
|
||||
# failure, so use None directly
|
||||
return None
|
||||
@@ -213,7 +213,7 @@ class XPUPlatform(Platform):
|
||||
|
||||
@classmethod
|
||||
def get_current_memory_usage(
|
||||
cls, device: Optional[torch.types.Device] = None
|
||||
cls, device: torch.types.Device | None = None
|
||||
) -> float:
|
||||
torch.xpu.reset_peak_memory_stats(device)
|
||||
return torch.xpu.max_memory_allocated(device)
|
||||
|
||||
Reference in New Issue
Block a user