Update Optional[x] -> x | None and Union[x, y] to x | y (#26633)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-10-12 17:51:31 +01:00
committed by GitHub
parent 9bb38130cb
commit 8fcaaf6a16
944 changed files with 9490 additions and 10121 deletions

View File

@@ -16,10 +16,10 @@ import socket
import tempfile
import uuid
from argparse import Namespace
from collections.abc import AsyncGenerator, AsyncIterator, Awaitable
from collections.abc import AsyncGenerator, AsyncIterator, Awaitable, Callable
from contextlib import asynccontextmanager
from http import HTTPStatus
from typing import Annotated, Any, Callable, Literal, Optional
from typing import Annotated, Any, Literal
import prometheus_client
import pydantic
@@ -166,8 +166,8 @@ async def build_async_engine_client(
args: Namespace,
*,
usage_context: UsageContext = UsageContext.OPENAI_API_SERVER,
disable_frontend_multiprocessing: Optional[bool] = None,
client_config: Optional[dict[str, Any]] = None,
disable_frontend_multiprocessing: bool | None = None,
client_config: dict[str, Any] | None = None,
) -> AsyncIterator[EngineClient]:
if os.getenv("VLLM_WORKER_MULTIPROC_METHOD") == "forkserver":
# The executor is expected to be mp.
@@ -203,7 +203,7 @@ async def build_async_engine_client_from_engine_args(
*,
usage_context: UsageContext = UsageContext.OPENAI_API_SERVER,
disable_frontend_multiprocessing: bool = False,
client_config: Optional[dict[str, Any]] = None,
client_config: dict[str, Any] | None = None,
) -> AsyncIterator[EngineClient]:
"""
Create EngineClient, either:
@@ -227,7 +227,7 @@ async def build_async_engine_client_from_engine_args(
from vllm.v1.engine.async_llm import AsyncLLM
async_llm: Optional[AsyncLLM] = None
async_llm: AsyncLLM | None = None
# Don't mutate the input client_config
client_config = dict(client_config) if client_config else {}
@@ -308,35 +308,35 @@ def models(request: Request) -> OpenAIServingModels:
return request.app.state.openai_serving_models
def responses(request: Request) -> Optional[OpenAIServingResponses]:
def responses(request: Request) -> OpenAIServingResponses | None:
return request.app.state.openai_serving_responses
def chat(request: Request) -> Optional[OpenAIServingChat]:
def chat(request: Request) -> OpenAIServingChat | None:
return request.app.state.openai_serving_chat
def completion(request: Request) -> Optional[OpenAIServingCompletion]:
def completion(request: Request) -> OpenAIServingCompletion | None:
return request.app.state.openai_serving_completion
def pooling(request: Request) -> Optional[OpenAIServingPooling]:
def pooling(request: Request) -> OpenAIServingPooling | None:
return request.app.state.openai_serving_pooling
def embedding(request: Request) -> Optional[OpenAIServingEmbedding]:
def embedding(request: Request) -> OpenAIServingEmbedding | None:
return request.app.state.openai_serving_embedding
def score(request: Request) -> Optional[ServingScores]:
def score(request: Request) -> ServingScores | None:
return request.app.state.openai_serving_scores
def classify(request: Request) -> Optional[ServingClassification]:
def classify(request: Request) -> ServingClassification | None:
return request.app.state.openai_serving_classification
def rerank(request: Request) -> Optional[ServingScores]:
def rerank(request: Request) -> ServingScores | None:
return request.app.state.openai_serving_scores
@@ -542,8 +542,8 @@ async def create_responses(request: ResponsesRequest, raw_request: Request):
async def retrieve_responses(
response_id: str,
raw_request: Request,
starting_after: Optional[int] = None,
stream: Optional[bool] = False,
starting_after: int | None = None,
stream: bool | None = False,
):
handler = responses(raw_request)
if handler is None:
@@ -1039,7 +1039,7 @@ if envs.VLLM_SERVER_DEV_MODE:
# User-defined `method` is responsible for deserialization if needed.
args: list[str] = body.get("args", [])
kwargs: dict[str, str] = body.get("kwargs", {})
timeout: Optional[float] = body.get("timeout")
timeout: float | None = body.get("timeout")
results = await engine_client(raw_request).collective_rpc(
method=method, timeout=timeout, args=tuple(args), kwargs=kwargs
)
@@ -1120,7 +1120,7 @@ async def is_scaling_elastic_ep(raw_request: Request):
# TODO: RequestType = TypeForm[BaseModel] when recognized by type checkers
# (requires typing_extensions >= 4.13)
RequestType = Any
GetHandlerFn = Callable[[Request], Optional[OpenAIServing]]
GetHandlerFn = Callable[[Request], OpenAIServing | None]
EndpointFn = Callable[[RequestType, Request], Awaitable[Any]]
# NOTE: Items defined earlier take higher priority
@@ -1236,7 +1236,7 @@ if envs.VLLM_ALLOW_RUNTIME_LORA_UPDATING:
return Response(status_code=200, content=response)
def load_log_config(log_config_file: Optional[str]) -> Optional[dict]:
def load_log_config(log_config_file: str | None) -> dict | None:
if not log_config_file:
return None
try:
@@ -1655,7 +1655,7 @@ async def init_app_state(
)
if args.tool_server == "demo":
tool_server: Optional[ToolServer] = DemoToolServer()
tool_server: ToolServer | None = DemoToolServer()
assert isinstance(tool_server, DemoToolServer)
await tool_server.init_and_validate()
elif args.tool_server: