Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -7,12 +7,12 @@ import signal
import threading
import time
from collections import deque
from collections.abc import Generator
from collections.abc import Callable, Generator
from concurrent.futures import Future
from contextlib import ExitStack, contextmanager
from inspect import isclass, signature
from logging import DEBUG
from typing import Any, Callable, Optional, TypeVar, Union
from typing import Any, TypeVar
import msgspec
import zmq
@@ -83,7 +83,7 @@ class EngineCore:
vllm_config: VllmConfig,
executor_class: type[Executor],
log_stats: bool,
executor_fail_callback: Optional[Callable] = None,
executor_fail_callback: Callable | None = None,
):
# plugins need to be loaded at the engine/scheduler level too
from vllm.plugins import load_general_plugins
@@ -171,14 +171,14 @@ class EngineCore:
# schedule and execute batches, and is required by pipeline parallelism
# to eliminate pipeline bubbles.
self.batch_queue_size = self.model_executor.max_concurrent_batches
self.batch_queue: Optional[
deque[tuple[Future[ModelRunnerOutput], SchedulerOutput]]
] = None
self.batch_queue: (
deque[tuple[Future[ModelRunnerOutput], SchedulerOutput]] | None
) = None
if self.batch_queue_size > 1:
logger.info("Batch queue is enabled with size %d", self.batch_queue_size)
self.batch_queue = deque(maxlen=self.batch_queue_size)
self.request_block_hasher: Optional[Callable[[Request], list[BlockHash]]] = None
self.request_block_hasher: Callable[[Request], list[BlockHash]] | None = None
if (
self.vllm_config.cache_config.enable_prefix_caching
or self.scheduler.get_kv_connector() is not None
@@ -337,7 +337,7 @@ class EngineCore:
def step_with_batch_queue(
self,
) -> tuple[Optional[dict[int, EngineCoreOutputs]], bool]:
) -> tuple[dict[int, EngineCoreOutputs] | None, bool]:
"""Schedule and execute batches with the batch queue.
Note that if nothing to output in this step, None is returned.
@@ -424,7 +424,7 @@ class EngineCore:
def sleep(self, level: int = 1):
self.model_executor.sleep(level)
def wake_up(self, tags: Optional[list[str]] = None):
def wake_up(self, tags: list[str] | None = None):
self.model_executor.wake_up(tags)
def is_sleeping(self) -> bool:
@@ -448,8 +448,8 @@ class EngineCore:
def save_sharded_state(
self,
path: str,
pattern: Optional[str] = None,
max_size: Optional[int] = None,
pattern: str | None = None,
max_size: int | None = None,
) -> None:
self.model_executor.save_sharded_state(
path=path, pattern=pattern, max_size=max_size
@@ -457,10 +457,10 @@ class EngineCore:
def collective_rpc(
self,
method: Union[str, Callable[..., _R]],
timeout: Optional[float] = None,
method: str | Callable[..., _R],
timeout: float | None = None,
args: tuple = (),
kwargs: Optional[dict[str, Any]] = None,
kwargs: dict[str, Any] | None = None,
) -> list[_R]:
return self.model_executor.collective_rpc(method, timeout, args, kwargs)
@@ -509,11 +509,11 @@ class EngineCoreProc(EngineCore):
handshake_address: str,
executor_class: type[Executor],
log_stats: bool,
client_handshake_address: Optional[str] = None,
client_handshake_address: str | None = None,
engine_index: int = 0,
):
self.input_queue = queue.Queue[tuple[EngineCoreRequestType, Any]]()
self.output_queue = queue.Queue[Union[tuple[int, EngineCoreOutputs], bytes]]()
self.output_queue = queue.Queue[tuple[int, EngineCoreOutputs] | bytes]()
executor_fail_callback = lambda: self.input_queue.put_nowait(
(EngineCoreRequestType.EXECUTOR_FAILED, b"")
)
@@ -606,7 +606,7 @@ class EngineCoreProc(EngineCore):
identity: bytes,
local_client: bool,
vllm_config: VllmConfig,
client_handshake_address: Optional[str],
client_handshake_address: str | None,
) -> Generator[EngineZmqAddresses, None, None]:
"""
Perform startup handshakes.
@@ -667,7 +667,7 @@ class EngineCoreProc(EngineCore):
local_client: bool,
headless: bool,
vllm_config: VllmConfig,
parallel_config_to_update: Optional[ParallelConfig] = None,
parallel_config_to_update: ParallelConfig | None = None,
) -> Generator[EngineZmqAddresses, None, None]:
with make_zmq_socket(
ctx,
@@ -710,7 +710,7 @@ class EngineCoreProc(EngineCore):
handshake_socket: zmq.Socket,
local_client: bool,
headless: bool,
parallel_config: Optional[ParallelConfig] = None,
parallel_config: ParallelConfig | None = None,
) -> EngineZmqAddresses:
# Send registration message.
handshake_socket.send(
@@ -765,7 +765,7 @@ class EngineCoreProc(EngineCore):
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
engine_core: Optional[EngineCoreProc] = None
engine_core: EngineCoreProc | None = None
try:
parallel_config: ParallelConfig = kwargs["vllm_config"].parallel_config
if parallel_config.data_parallel_size > 1 or dp_rank > 0:
@@ -911,7 +911,7 @@ class EngineCoreProc(EngineCore):
def process_input_sockets(
self,
input_addresses: list[str],
coord_input_address: Optional[str],
coord_input_address: str | None,
identity: bytes,
ready_event: threading.Event,
):
@@ -980,7 +980,7 @@ class EngineCoreProc(EngineCore):
def process_output_sockets(
self,
output_paths: list[str],
coord_output_path: Optional[str],
coord_output_path: str | None,
engine_index: int,
):
"""Output socket IO thread."""
@@ -1059,7 +1059,7 @@ class DPEngineCoreProc(EngineCoreProc):
handshake_address: str,
executor_class: type[Executor],
log_stats: bool,
client_handshake_address: Optional[str] = None,
client_handshake_address: str | None = None,
):
# Counts forward-passes of the model so that we can synchronize
# finished with DP peers every N steps.
@@ -1332,7 +1332,7 @@ class DPEngineCoreActor(DPEngineCoreProc):
identity: bytes,
local_client: bool,
vllm_config: VllmConfig,
client_handshake_address: Optional[str],
client_handshake_address: str | None,
):
"""
For Ray, we don't need to actually perform handshake.