Update deprecated Python 3.8 typing (#13971)

This commit is contained in:
Harry Mellor
2025-03-03 01:34:51 +00:00
committed by GitHub
parent bf33700ecd
commit cf069aa8aa
300 changed files with 2294 additions and 2347 deletions

View File

@@ -5,13 +5,13 @@
import re
import time
from argparse import Namespace
from typing import Any, ClassVar, Dict, List, Literal, Optional, Set, Union
from typing import Annotated, Any, ClassVar, Literal, Optional, Union
import torch
from fastapi import UploadFile
from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
ValidationInfo, field_validator, model_validator)
from typing_extensions import Annotated, TypeAlias
from typing_extensions import TypeAlias
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
from vllm.logger import init_logger
@@ -47,7 +47,7 @@ class OpenAIBaseModel(BaseModel):
model_config = ConfigDict(extra="allow")
# Cache class field names
field_names: ClassVar[Optional[Set[str]]] = None
field_names: ClassVar[Optional[set[str]]] = None
@model_validator(mode="wrap")
@classmethod
@@ -105,12 +105,12 @@ class ModelCard(OpenAIBaseModel):
root: Optional[str] = None
parent: Optional[str] = None
max_model_len: Optional[int] = None
permission: List[ModelPermission] = Field(default_factory=list)
permission: list[ModelPermission] = Field(default_factory=list)
class ModelList(OpenAIBaseModel):
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)
data: list[ModelCard] = Field(default_factory=list)
class PromptTokenUsageInfo(OpenAIBaseModel):
@@ -134,7 +134,7 @@ class JsonSchemaResponseFormat(OpenAIBaseModel):
description: Optional[str] = None
# schema is the field in openai but that causes conflicts with pydantic so
# instead use json_schema with an alias
json_schema: Optional[Dict[str, Any]] = Field(default=None, alias='schema')
json_schema: Optional[dict[str, Any]] = Field(default=None, alias='schema')
strict: Optional[bool] = None
@@ -152,7 +152,7 @@ class StreamOptions(OpenAIBaseModel):
class FunctionDefinition(OpenAIBaseModel):
name: str
description: Optional[str] = None
parameters: Optional[Dict[str, Any]] = None
parameters: Optional[dict[str, Any]] = None
class ChatCompletionToolsParam(OpenAIBaseModel):
@@ -171,15 +171,15 @@ class ChatCompletionNamedToolChoiceParam(OpenAIBaseModel):
class LogitsProcessorConstructor(BaseModel):
qualname: str
args: Optional[List[Any]] = None
kwargs: Optional[Dict[str, Any]] = None
args: Optional[list[Any]] = None
kwargs: Optional[dict[str, Any]] = None
LogitsProcessors = List[Union[str, LogitsProcessorConstructor]]
LogitsProcessors = list[Union[str, LogitsProcessorConstructor]]
def get_logits_processors(processors: Optional[LogitsProcessors],
pattern: Optional[str]) -> Optional[List[Any]]:
pattern: Optional[str]) -> Optional[list[Any]]:
if processors and pattern:
logits_processors = []
for processor in processors:
@@ -212,10 +212,10 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
class ChatCompletionRequest(OpenAIBaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/chat/create
messages: List[ChatCompletionMessageParam]
messages: list[ChatCompletionMessageParam]
model: Optional[str] = None
frequency_penalty: Optional[float] = 0.0
logit_bias: Optional[Dict[str, float]] = None
logit_bias: Optional[dict[str, float]] = None
logprobs: Optional[bool] = False
top_logprobs: Optional[int] = 0
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
@@ -228,12 +228,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
presence_penalty: Optional[float] = 0.0
response_format: Optional[ResponseFormat] = None
seed: Optional[int] = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
stop: Optional[Union[str, list[str]]] = Field(default_factory=list)
stream: Optional[bool] = False
stream_options: Optional[StreamOptions] = None
temperature: Optional[float] = None
top_p: Optional[float] = None
tools: Optional[List[ChatCompletionToolsParam]] = None
tools: Optional[list[ChatCompletionToolsParam]] = None
tool_choice: Optional[Union[Literal["none"], Literal["auto"],
ChatCompletionNamedToolChoiceParam]] = "none"
@@ -248,7 +248,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
min_p: Optional[float] = None
repetition_penalty: Optional[float] = None
length_penalty: float = 1.0
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
stop_token_ids: Optional[list[int]] = Field(default_factory=list)
include_stop_str_in_output: bool = False
ignore_eos: bool = False
min_tokens: int = 0
@@ -290,7 +290,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
"special tokens so this should be set to false (as is the "
"default)."),
)
documents: Optional[List[Dict[str, str]]] = Field(
documents: Optional[list[dict[str, str]]] = Field(
default=None,
description=
("A list of dicts representing documents that will be accessible to "
@@ -307,12 +307,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
"allowed, so you must provide a chat template if the tokenizer "
"does not define one."),
)
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
chat_template_kwargs: Optional[dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the template renderer. "
"Will be accessible by the chat template."),
)
mm_processor_kwargs: Optional[Dict[str, Any]] = Field(
mm_processor_kwargs: Optional[dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the HF processor."),
)
@@ -325,7 +325,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
description=(
"If specified, the output will follow the regex pattern."),
)
guided_choice: Optional[List[str]] = Field(
guided_choice: Optional[list[str]] = Field(
default=None,
description=(
"If specified, the output will be exactly one of the choices."),
@@ -643,17 +643,17 @@ class CompletionRequest(OpenAIBaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/completions/create
model: Optional[str] = None
prompt: Union[List[int], List[List[int]], str, List[str]]
prompt: Union[list[int], list[list[int]], str, list[str]]
best_of: Optional[int] = None
echo: Optional[bool] = False
frequency_penalty: Optional[float] = 0.0
logit_bias: Optional[Dict[str, float]] = None
logit_bias: Optional[dict[str, float]] = None
logprobs: Optional[int] = None
max_tokens: Optional[int] = 16
n: int = 1
presence_penalty: Optional[float] = 0.0
seed: Optional[int] = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
stop: Optional[Union[str, list[str]]] = Field(default_factory=list)
stream: Optional[bool] = False
stream_options: Optional[StreamOptions] = None
suffix: Optional[str] = None
@@ -667,14 +667,14 @@ class CompletionRequest(OpenAIBaseModel):
min_p: Optional[float] = None
repetition_penalty: Optional[float] = None
length_penalty: float = 1.0
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
stop_token_ids: Optional[list[int]] = Field(default_factory=list)
include_stop_str_in_output: bool = False
ignore_eos: bool = False
min_tokens: int = 0
skip_special_tokens: bool = True
spaces_between_special_tokens: bool = True
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
allowed_token_ids: Optional[List[int]] = None
allowed_token_ids: Optional[list[int]] = None
prompt_logprobs: Optional[int] = None
# doc: end-completion-sampling-params
@@ -701,7 +701,7 @@ class CompletionRequest(OpenAIBaseModel):
description=(
"If specified, the output will follow the regex pattern."),
)
guided_choice: Optional[List[str]] = Field(
guided_choice: Optional[list[str]] = Field(
default=None,
description=(
"If specified, the output will be exactly one of the choices."),
@@ -908,7 +908,7 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
# Ordered by official OpenAI API documentation
# https://platform.openai.com/docs/api-reference/embeddings
model: Optional[str] = None
input: Union[List[int], List[List[int]], str, List[str]]
input: Union[list[int], list[list[int]], str, list[str]]
encoding_format: Literal["float", "base64"] = "float"
dimensions: Optional[int] = None
user: Optional[str] = None
@@ -940,7 +940,7 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
class EmbeddingChatRequest(OpenAIBaseModel):
model: Optional[str] = None
messages: List[ChatCompletionMessageParam]
messages: list[ChatCompletionMessageParam]
encoding_format: Literal["float", "base64"] = "float"
dimensions: Optional[int] = None
@@ -969,12 +969,12 @@ class EmbeddingChatRequest(OpenAIBaseModel):
"allowed, so you must provide a chat template if the tokenizer "
"does not define one."),
)
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
chat_template_kwargs: Optional[dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the template renderer. "
"Will be accessible by the chat template."),
)
mm_processor_kwargs: Optional[Dict[str, Any]] = Field(
mm_processor_kwargs: Optional[dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the HF processor."),
)
@@ -1008,8 +1008,8 @@ PoolingRequest = Union[PoolingCompletionRequest, PoolingChatRequest]
class ScoreRequest(OpenAIBaseModel):
model: Optional[str] = None
text_1: Union[List[str], str]
text_2: Union[List[str], str]
text_1: Union[list[str], str]
text_2: Union[list[str], str]
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
# doc: begin-score-pooling-params
@@ -1033,7 +1033,7 @@ class ScoreRequest(OpenAIBaseModel):
class RerankRequest(OpenAIBaseModel):
model: Optional[str] = None
query: str
documents: List[str]
documents: list[str]
top_n: int = Field(default_factory=lambda: 0)
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
@@ -1073,14 +1073,14 @@ class RerankResponse(OpenAIBaseModel):
id: str
model: str
usage: RerankUsage
results: List[RerankResult]
results: list[RerankResult]
class CompletionLogProbs(OpenAIBaseModel):
text_offset: List[int] = Field(default_factory=list)
token_logprobs: List[Optional[float]] = Field(default_factory=list)
tokens: List[str] = Field(default_factory=list)
top_logprobs: List[Optional[Dict[str,
text_offset: list[int] = Field(default_factory=list)
token_logprobs: list[Optional[float]] = Field(default_factory=list)
tokens: list[str] = Field(default_factory=list)
top_logprobs: list[Optional[dict[str,
float]]] = Field(default_factory=list)
@@ -1096,7 +1096,7 @@ class CompletionResponseChoice(OpenAIBaseModel):
"to stop, None if the completion finished for some other reason "
"including encountering the EOS token"),
)
prompt_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
class CompletionResponse(OpenAIBaseModel):
@@ -1104,7 +1104,7 @@ class CompletionResponse(OpenAIBaseModel):
object: str = "text_completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[CompletionResponseChoice]
choices: list[CompletionResponseChoice]
usage: UsageInfo
@@ -1127,14 +1127,14 @@ class CompletionStreamResponse(OpenAIBaseModel):
object: str = "text_completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[CompletionResponseStreamChoice]
choices: list[CompletionResponseStreamChoice]
usage: Optional[UsageInfo] = Field(default=None)
class EmbeddingResponseData(OpenAIBaseModel):
index: int
object: str = "embedding"
embedding: Union[List[float], str]
embedding: Union[list[float], str]
class EmbeddingResponse(OpenAIBaseModel):
@@ -1142,14 +1142,14 @@ class EmbeddingResponse(OpenAIBaseModel):
object: str = "list"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
data: List[EmbeddingResponseData]
data: list[EmbeddingResponseData]
usage: UsageInfo
class PoolingResponseData(OpenAIBaseModel):
index: int
object: str = "pooling"
data: Union[List[List[float]], List[float], str]
data: Union[list[list[float]], list[float], str]
class PoolingResponse(OpenAIBaseModel):
@@ -1157,7 +1157,7 @@ class PoolingResponse(OpenAIBaseModel):
object: str = "list"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
data: List[PoolingResponseData]
data: list[PoolingResponseData]
usage: UsageInfo
@@ -1172,7 +1172,7 @@ class ScoreResponse(OpenAIBaseModel):
object: str = "list"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
data: List[ScoreResponseData]
data: list[ScoreResponseData]
usage: UsageInfo
@@ -1205,7 +1205,7 @@ class ExtractedToolCallInformation(BaseModel):
tools_called: bool
# extracted tool calls
tool_calls: List[ToolCall]
tool_calls: list[ToolCall]
# content - per OpenAI spec, content AND tool calls can be returned rarely
# But some models will do this intentionally
@@ -1216,21 +1216,21 @@ class ChatMessage(OpenAIBaseModel):
role: str
reasoning_content: Optional[str] = None
content: Optional[str] = None
tool_calls: List[ToolCall] = Field(default_factory=list)
tool_calls: list[ToolCall] = Field(default_factory=list)
class ChatCompletionLogProb(OpenAIBaseModel):
token: str
logprob: float = -9999.0
bytes: Optional[List[int]] = None
bytes: Optional[list[int]] = None
class ChatCompletionLogProbsContent(ChatCompletionLogProb):
top_logprobs: List[ChatCompletionLogProb] = Field(default_factory=list)
top_logprobs: list[ChatCompletionLogProb] = Field(default_factory=list)
class ChatCompletionLogProbs(OpenAIBaseModel):
content: Optional[List[ChatCompletionLogProbsContent]] = None
content: Optional[list[ChatCompletionLogProbsContent]] = None
class ChatCompletionResponseChoice(OpenAIBaseModel):
@@ -1248,16 +1248,16 @@ class ChatCompletionResponse(OpenAIBaseModel):
object: Literal["chat.completion"] = "chat.completion"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[ChatCompletionResponseChoice]
choices: list[ChatCompletionResponseChoice]
usage: UsageInfo
prompt_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
class DeltaMessage(OpenAIBaseModel):
role: Optional[str] = None
content: Optional[str] = None
reasoning_content: Optional[str] = None
tool_calls: List[DeltaToolCall] = Field(default_factory=list)
tool_calls: list[DeltaToolCall] = Field(default_factory=list)
class ChatCompletionResponseStreamChoice(OpenAIBaseModel):
@@ -1273,7 +1273,7 @@ class ChatCompletionStreamResponse(OpenAIBaseModel):
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
created: int = Field(default_factory=lambda: int(time.time()))
model: str
choices: List[ChatCompletionResponseStreamChoice]
choices: list[ChatCompletionResponseStreamChoice]
usage: Optional[UsageInfo] = Field(default=None)
@@ -1358,7 +1358,7 @@ class TokenizeCompletionRequest(OpenAIBaseModel):
class TokenizeChatRequest(OpenAIBaseModel):
model: Optional[str] = None
messages: List[ChatCompletionMessageParam]
messages: list[ChatCompletionMessageParam]
add_generation_prompt: bool = Field(
default=True,
@@ -1393,12 +1393,12 @@ class TokenizeChatRequest(OpenAIBaseModel):
"allowed, so you must provide a chat template if the tokenizer "
"does not define one."),
)
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
chat_template_kwargs: Optional[dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the template renderer. "
"Will be accessible by the chat template."),
)
mm_processor_kwargs: Optional[Dict[str, Any]] = Field(
mm_processor_kwargs: Optional[dict[str, Any]] = Field(
default=None,
description=("Additional kwargs to pass to the HF processor."),
)
@@ -1419,12 +1419,12 @@ TokenizeRequest = Union[TokenizeCompletionRequest, TokenizeChatRequest]
class TokenizeResponse(OpenAIBaseModel):
count: int
max_model_len: int
tokens: List[int]
tokens: list[int]
class DetokenizeRequest(OpenAIBaseModel):
model: Optional[str] = None
tokens: List[int]
tokens: list[int]
class DetokenizeResponse(OpenAIBaseModel):
@@ -1492,7 +1492,7 @@ class TranscriptionRequest(OpenAIBaseModel):
to automatically increase the temperature until certain thresholds are hit.
"""
timestamp_granularities: List[Literal["word", "segment"]] = Field(
timestamp_granularities: list[Literal["word", "segment"]] = Field(
alias="timestamp_granularities[]", default=[])
"""The timestamp granularities to populate for this transcription.
@@ -1580,7 +1580,7 @@ class TranscriptionSegment(OpenAIBaseModel):
text: str
"""Text content of the segment."""
tokens: List[int]
tokens: list[int]
"""Array of token IDs for the text content."""
@@ -1594,8 +1594,8 @@ class TranscriptionResponseVerbose(OpenAIBaseModel):
text: str
"""The transcribed text."""
segments: Optional[List[TranscriptionSegment]] = None
segments: Optional[list[TranscriptionSegment]] = None
"""Segments of the transcribed text and their corresponding details."""
words: Optional[List[TranscriptionWord]] = None
words: Optional[list[TranscriptionWord]] = None
"""Extracted words and their corresponding timestamps."""