Update deprecated Python 3.8 typing (#13971)
This commit is contained in:
@@ -5,13 +5,13 @@
|
||||
import re
|
||||
import time
|
||||
from argparse import Namespace
|
||||
from typing import Any, ClassVar, Dict, List, Literal, Optional, Set, Union
|
||||
from typing import Annotated, Any, ClassVar, Literal, Optional, Union
|
||||
|
||||
import torch
|
||||
from fastapi import UploadFile
|
||||
from pydantic import (BaseModel, ConfigDict, Field, TypeAdapter,
|
||||
ValidationInfo, field_validator, model_validator)
|
||||
from typing_extensions import Annotated, TypeAlias
|
||||
from typing_extensions import TypeAlias
|
||||
|
||||
from vllm.entrypoints.chat_utils import ChatCompletionMessageParam
|
||||
from vllm.logger import init_logger
|
||||
@@ -47,7 +47,7 @@ class OpenAIBaseModel(BaseModel):
|
||||
model_config = ConfigDict(extra="allow")
|
||||
|
||||
# Cache class field names
|
||||
field_names: ClassVar[Optional[Set[str]]] = None
|
||||
field_names: ClassVar[Optional[set[str]]] = None
|
||||
|
||||
@model_validator(mode="wrap")
|
||||
@classmethod
|
||||
@@ -105,12 +105,12 @@ class ModelCard(OpenAIBaseModel):
|
||||
root: Optional[str] = None
|
||||
parent: Optional[str] = None
|
||||
max_model_len: Optional[int] = None
|
||||
permission: List[ModelPermission] = Field(default_factory=list)
|
||||
permission: list[ModelPermission] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ModelList(OpenAIBaseModel):
|
||||
object: str = "list"
|
||||
data: List[ModelCard] = Field(default_factory=list)
|
||||
data: list[ModelCard] = Field(default_factory=list)
|
||||
|
||||
|
||||
class PromptTokenUsageInfo(OpenAIBaseModel):
|
||||
@@ -134,7 +134,7 @@ class JsonSchemaResponseFormat(OpenAIBaseModel):
|
||||
description: Optional[str] = None
|
||||
# schema is the field in openai but that causes conflicts with pydantic so
|
||||
# instead use json_schema with an alias
|
||||
json_schema: Optional[Dict[str, Any]] = Field(default=None, alias='schema')
|
||||
json_schema: Optional[dict[str, Any]] = Field(default=None, alias='schema')
|
||||
strict: Optional[bool] = None
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ class StreamOptions(OpenAIBaseModel):
|
||||
class FunctionDefinition(OpenAIBaseModel):
|
||||
name: str
|
||||
description: Optional[str] = None
|
||||
parameters: Optional[Dict[str, Any]] = None
|
||||
parameters: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
class ChatCompletionToolsParam(OpenAIBaseModel):
|
||||
@@ -171,15 +171,15 @@ class ChatCompletionNamedToolChoiceParam(OpenAIBaseModel):
|
||||
|
||||
class LogitsProcessorConstructor(BaseModel):
|
||||
qualname: str
|
||||
args: Optional[List[Any]] = None
|
||||
kwargs: Optional[Dict[str, Any]] = None
|
||||
args: Optional[list[Any]] = None
|
||||
kwargs: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
LogitsProcessors = List[Union[str, LogitsProcessorConstructor]]
|
||||
LogitsProcessors = list[Union[str, LogitsProcessorConstructor]]
|
||||
|
||||
|
||||
def get_logits_processors(processors: Optional[LogitsProcessors],
|
||||
pattern: Optional[str]) -> Optional[List[Any]]:
|
||||
pattern: Optional[str]) -> Optional[list[Any]]:
|
||||
if processors and pattern:
|
||||
logits_processors = []
|
||||
for processor in processors:
|
||||
@@ -212,10 +212,10 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
|
||||
class ChatCompletionRequest(OpenAIBaseModel):
|
||||
# Ordered by official OpenAI API documentation
|
||||
# https://platform.openai.com/docs/api-reference/chat/create
|
||||
messages: List[ChatCompletionMessageParam]
|
||||
messages: list[ChatCompletionMessageParam]
|
||||
model: Optional[str] = None
|
||||
frequency_penalty: Optional[float] = 0.0
|
||||
logit_bias: Optional[Dict[str, float]] = None
|
||||
logit_bias: Optional[dict[str, float]] = None
|
||||
logprobs: Optional[bool] = False
|
||||
top_logprobs: Optional[int] = 0
|
||||
# TODO(#9845): remove max_tokens when field is removed from OpenAI API
|
||||
@@ -228,12 +228,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
presence_penalty: Optional[float] = 0.0
|
||||
response_format: Optional[ResponseFormat] = None
|
||||
seed: Optional[int] = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
|
||||
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
||||
stop: Optional[Union[str, list[str]]] = Field(default_factory=list)
|
||||
stream: Optional[bool] = False
|
||||
stream_options: Optional[StreamOptions] = None
|
||||
temperature: Optional[float] = None
|
||||
top_p: Optional[float] = None
|
||||
tools: Optional[List[ChatCompletionToolsParam]] = None
|
||||
tools: Optional[list[ChatCompletionToolsParam]] = None
|
||||
tool_choice: Optional[Union[Literal["none"], Literal["auto"],
|
||||
ChatCompletionNamedToolChoiceParam]] = "none"
|
||||
|
||||
@@ -248,7 +248,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
min_p: Optional[float] = None
|
||||
repetition_penalty: Optional[float] = None
|
||||
length_penalty: float = 1.0
|
||||
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
|
||||
stop_token_ids: Optional[list[int]] = Field(default_factory=list)
|
||||
include_stop_str_in_output: bool = False
|
||||
ignore_eos: bool = False
|
||||
min_tokens: int = 0
|
||||
@@ -290,7 +290,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"special tokens so this should be set to false (as is the "
|
||||
"default)."),
|
||||
)
|
||||
documents: Optional[List[Dict[str, str]]] = Field(
|
||||
documents: Optional[list[dict[str, str]]] = Field(
|
||||
default=None,
|
||||
description=
|
||||
("A list of dicts representing documents that will be accessible to "
|
||||
@@ -307,12 +307,12 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
"allowed, so you must provide a chat template if the tokenizer "
|
||||
"does not define one."),
|
||||
)
|
||||
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
chat_template_kwargs: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the template renderer. "
|
||||
"Will be accessible by the chat template."),
|
||||
)
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
mm_processor_kwargs: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the HF processor."),
|
||||
)
|
||||
@@ -325,7 +325,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
|
||||
description=(
|
||||
"If specified, the output will follow the regex pattern."),
|
||||
)
|
||||
guided_choice: Optional[List[str]] = Field(
|
||||
guided_choice: Optional[list[str]] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"If specified, the output will be exactly one of the choices."),
|
||||
@@ -643,17 +643,17 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
# Ordered by official OpenAI API documentation
|
||||
# https://platform.openai.com/docs/api-reference/completions/create
|
||||
model: Optional[str] = None
|
||||
prompt: Union[List[int], List[List[int]], str, List[str]]
|
||||
prompt: Union[list[int], list[list[int]], str, list[str]]
|
||||
best_of: Optional[int] = None
|
||||
echo: Optional[bool] = False
|
||||
frequency_penalty: Optional[float] = 0.0
|
||||
logit_bias: Optional[Dict[str, float]] = None
|
||||
logit_bias: Optional[dict[str, float]] = None
|
||||
logprobs: Optional[int] = None
|
||||
max_tokens: Optional[int] = 16
|
||||
n: int = 1
|
||||
presence_penalty: Optional[float] = 0.0
|
||||
seed: Optional[int] = Field(None, ge=_LONG_INFO.min, le=_LONG_INFO.max)
|
||||
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
|
||||
stop: Optional[Union[str, list[str]]] = Field(default_factory=list)
|
||||
stream: Optional[bool] = False
|
||||
stream_options: Optional[StreamOptions] = None
|
||||
suffix: Optional[str] = None
|
||||
@@ -667,14 +667,14 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
min_p: Optional[float] = None
|
||||
repetition_penalty: Optional[float] = None
|
||||
length_penalty: float = 1.0
|
||||
stop_token_ids: Optional[List[int]] = Field(default_factory=list)
|
||||
stop_token_ids: Optional[list[int]] = Field(default_factory=list)
|
||||
include_stop_str_in_output: bool = False
|
||||
ignore_eos: bool = False
|
||||
min_tokens: int = 0
|
||||
skip_special_tokens: bool = True
|
||||
spaces_between_special_tokens: bool = True
|
||||
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
|
||||
allowed_token_ids: Optional[List[int]] = None
|
||||
allowed_token_ids: Optional[list[int]] = None
|
||||
prompt_logprobs: Optional[int] = None
|
||||
# doc: end-completion-sampling-params
|
||||
|
||||
@@ -701,7 +701,7 @@ class CompletionRequest(OpenAIBaseModel):
|
||||
description=(
|
||||
"If specified, the output will follow the regex pattern."),
|
||||
)
|
||||
guided_choice: Optional[List[str]] = Field(
|
||||
guided_choice: Optional[list[str]] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"If specified, the output will be exactly one of the choices."),
|
||||
@@ -908,7 +908,7 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
|
||||
# Ordered by official OpenAI API documentation
|
||||
# https://platform.openai.com/docs/api-reference/embeddings
|
||||
model: Optional[str] = None
|
||||
input: Union[List[int], List[List[int]], str, List[str]]
|
||||
input: Union[list[int], list[list[int]], str, list[str]]
|
||||
encoding_format: Literal["float", "base64"] = "float"
|
||||
dimensions: Optional[int] = None
|
||||
user: Optional[str] = None
|
||||
@@ -940,7 +940,7 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
|
||||
|
||||
class EmbeddingChatRequest(OpenAIBaseModel):
|
||||
model: Optional[str] = None
|
||||
messages: List[ChatCompletionMessageParam]
|
||||
messages: list[ChatCompletionMessageParam]
|
||||
|
||||
encoding_format: Literal["float", "base64"] = "float"
|
||||
dimensions: Optional[int] = None
|
||||
@@ -969,12 +969,12 @@ class EmbeddingChatRequest(OpenAIBaseModel):
|
||||
"allowed, so you must provide a chat template if the tokenizer "
|
||||
"does not define one."),
|
||||
)
|
||||
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
chat_template_kwargs: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the template renderer. "
|
||||
"Will be accessible by the chat template."),
|
||||
)
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
mm_processor_kwargs: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the HF processor."),
|
||||
)
|
||||
@@ -1008,8 +1008,8 @@ PoolingRequest = Union[PoolingCompletionRequest, PoolingChatRequest]
|
||||
|
||||
class ScoreRequest(OpenAIBaseModel):
|
||||
model: Optional[str] = None
|
||||
text_1: Union[List[str], str]
|
||||
text_2: Union[List[str], str]
|
||||
text_1: Union[list[str], str]
|
||||
text_2: Union[list[str], str]
|
||||
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
|
||||
|
||||
# doc: begin-score-pooling-params
|
||||
@@ -1033,7 +1033,7 @@ class ScoreRequest(OpenAIBaseModel):
|
||||
class RerankRequest(OpenAIBaseModel):
|
||||
model: Optional[str] = None
|
||||
query: str
|
||||
documents: List[str]
|
||||
documents: list[str]
|
||||
top_n: int = Field(default_factory=lambda: 0)
|
||||
truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
|
||||
|
||||
@@ -1073,14 +1073,14 @@ class RerankResponse(OpenAIBaseModel):
|
||||
id: str
|
||||
model: str
|
||||
usage: RerankUsage
|
||||
results: List[RerankResult]
|
||||
results: list[RerankResult]
|
||||
|
||||
|
||||
class CompletionLogProbs(OpenAIBaseModel):
|
||||
text_offset: List[int] = Field(default_factory=list)
|
||||
token_logprobs: List[Optional[float]] = Field(default_factory=list)
|
||||
tokens: List[str] = Field(default_factory=list)
|
||||
top_logprobs: List[Optional[Dict[str,
|
||||
text_offset: list[int] = Field(default_factory=list)
|
||||
token_logprobs: list[Optional[float]] = Field(default_factory=list)
|
||||
tokens: list[str] = Field(default_factory=list)
|
||||
top_logprobs: list[Optional[dict[str,
|
||||
float]]] = Field(default_factory=list)
|
||||
|
||||
|
||||
@@ -1096,7 +1096,7 @@ class CompletionResponseChoice(OpenAIBaseModel):
|
||||
"to stop, None if the completion finished for some other reason "
|
||||
"including encountering the EOS token"),
|
||||
)
|
||||
prompt_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None
|
||||
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
|
||||
|
||||
|
||||
class CompletionResponse(OpenAIBaseModel):
|
||||
@@ -1104,7 +1104,7 @@ class CompletionResponse(OpenAIBaseModel):
|
||||
object: str = "text_completion"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
choices: List[CompletionResponseChoice]
|
||||
choices: list[CompletionResponseChoice]
|
||||
usage: UsageInfo
|
||||
|
||||
|
||||
@@ -1127,14 +1127,14 @@ class CompletionStreamResponse(OpenAIBaseModel):
|
||||
object: str = "text_completion"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
choices: List[CompletionResponseStreamChoice]
|
||||
choices: list[CompletionResponseStreamChoice]
|
||||
usage: Optional[UsageInfo] = Field(default=None)
|
||||
|
||||
|
||||
class EmbeddingResponseData(OpenAIBaseModel):
|
||||
index: int
|
||||
object: str = "embedding"
|
||||
embedding: Union[List[float], str]
|
||||
embedding: Union[list[float], str]
|
||||
|
||||
|
||||
class EmbeddingResponse(OpenAIBaseModel):
|
||||
@@ -1142,14 +1142,14 @@ class EmbeddingResponse(OpenAIBaseModel):
|
||||
object: str = "list"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
data: List[EmbeddingResponseData]
|
||||
data: list[EmbeddingResponseData]
|
||||
usage: UsageInfo
|
||||
|
||||
|
||||
class PoolingResponseData(OpenAIBaseModel):
|
||||
index: int
|
||||
object: str = "pooling"
|
||||
data: Union[List[List[float]], List[float], str]
|
||||
data: Union[list[list[float]], list[float], str]
|
||||
|
||||
|
||||
class PoolingResponse(OpenAIBaseModel):
|
||||
@@ -1157,7 +1157,7 @@ class PoolingResponse(OpenAIBaseModel):
|
||||
object: str = "list"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
data: List[PoolingResponseData]
|
||||
data: list[PoolingResponseData]
|
||||
usage: UsageInfo
|
||||
|
||||
|
||||
@@ -1172,7 +1172,7 @@ class ScoreResponse(OpenAIBaseModel):
|
||||
object: str = "list"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
data: List[ScoreResponseData]
|
||||
data: list[ScoreResponseData]
|
||||
usage: UsageInfo
|
||||
|
||||
|
||||
@@ -1205,7 +1205,7 @@ class ExtractedToolCallInformation(BaseModel):
|
||||
tools_called: bool
|
||||
|
||||
# extracted tool calls
|
||||
tool_calls: List[ToolCall]
|
||||
tool_calls: list[ToolCall]
|
||||
|
||||
# content - per OpenAI spec, content AND tool calls can be returned rarely
|
||||
# But some models will do this intentionally
|
||||
@@ -1216,21 +1216,21 @@ class ChatMessage(OpenAIBaseModel):
|
||||
role: str
|
||||
reasoning_content: Optional[str] = None
|
||||
content: Optional[str] = None
|
||||
tool_calls: List[ToolCall] = Field(default_factory=list)
|
||||
tool_calls: list[ToolCall] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ChatCompletionLogProb(OpenAIBaseModel):
|
||||
token: str
|
||||
logprob: float = -9999.0
|
||||
bytes: Optional[List[int]] = None
|
||||
bytes: Optional[list[int]] = None
|
||||
|
||||
|
||||
class ChatCompletionLogProbsContent(ChatCompletionLogProb):
|
||||
top_logprobs: List[ChatCompletionLogProb] = Field(default_factory=list)
|
||||
top_logprobs: list[ChatCompletionLogProb] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ChatCompletionLogProbs(OpenAIBaseModel):
|
||||
content: Optional[List[ChatCompletionLogProbsContent]] = None
|
||||
content: Optional[list[ChatCompletionLogProbsContent]] = None
|
||||
|
||||
|
||||
class ChatCompletionResponseChoice(OpenAIBaseModel):
|
||||
@@ -1248,16 +1248,16 @@ class ChatCompletionResponse(OpenAIBaseModel):
|
||||
object: Literal["chat.completion"] = "chat.completion"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
choices: List[ChatCompletionResponseChoice]
|
||||
choices: list[ChatCompletionResponseChoice]
|
||||
usage: UsageInfo
|
||||
prompt_logprobs: Optional[List[Optional[Dict[int, Logprob]]]] = None
|
||||
prompt_logprobs: Optional[list[Optional[dict[int, Logprob]]]] = None
|
||||
|
||||
|
||||
class DeltaMessage(OpenAIBaseModel):
|
||||
role: Optional[str] = None
|
||||
content: Optional[str] = None
|
||||
reasoning_content: Optional[str] = None
|
||||
tool_calls: List[DeltaToolCall] = Field(default_factory=list)
|
||||
tool_calls: list[DeltaToolCall] = Field(default_factory=list)
|
||||
|
||||
|
||||
class ChatCompletionResponseStreamChoice(OpenAIBaseModel):
|
||||
@@ -1273,7 +1273,7 @@ class ChatCompletionStreamResponse(OpenAIBaseModel):
|
||||
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
||||
created: int = Field(default_factory=lambda: int(time.time()))
|
||||
model: str
|
||||
choices: List[ChatCompletionResponseStreamChoice]
|
||||
choices: list[ChatCompletionResponseStreamChoice]
|
||||
usage: Optional[UsageInfo] = Field(default=None)
|
||||
|
||||
|
||||
@@ -1358,7 +1358,7 @@ class TokenizeCompletionRequest(OpenAIBaseModel):
|
||||
|
||||
class TokenizeChatRequest(OpenAIBaseModel):
|
||||
model: Optional[str] = None
|
||||
messages: List[ChatCompletionMessageParam]
|
||||
messages: list[ChatCompletionMessageParam]
|
||||
|
||||
add_generation_prompt: bool = Field(
|
||||
default=True,
|
||||
@@ -1393,12 +1393,12 @@ class TokenizeChatRequest(OpenAIBaseModel):
|
||||
"allowed, so you must provide a chat template if the tokenizer "
|
||||
"does not define one."),
|
||||
)
|
||||
chat_template_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
chat_template_kwargs: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the template renderer. "
|
||||
"Will be accessible by the chat template."),
|
||||
)
|
||||
mm_processor_kwargs: Optional[Dict[str, Any]] = Field(
|
||||
mm_processor_kwargs: Optional[dict[str, Any]] = Field(
|
||||
default=None,
|
||||
description=("Additional kwargs to pass to the HF processor."),
|
||||
)
|
||||
@@ -1419,12 +1419,12 @@ TokenizeRequest = Union[TokenizeCompletionRequest, TokenizeChatRequest]
|
||||
class TokenizeResponse(OpenAIBaseModel):
|
||||
count: int
|
||||
max_model_len: int
|
||||
tokens: List[int]
|
||||
tokens: list[int]
|
||||
|
||||
|
||||
class DetokenizeRequest(OpenAIBaseModel):
|
||||
model: Optional[str] = None
|
||||
tokens: List[int]
|
||||
tokens: list[int]
|
||||
|
||||
|
||||
class DetokenizeResponse(OpenAIBaseModel):
|
||||
@@ -1492,7 +1492,7 @@ class TranscriptionRequest(OpenAIBaseModel):
|
||||
to automatically increase the temperature until certain thresholds are hit.
|
||||
"""
|
||||
|
||||
timestamp_granularities: List[Literal["word", "segment"]] = Field(
|
||||
timestamp_granularities: list[Literal["word", "segment"]] = Field(
|
||||
alias="timestamp_granularities[]", default=[])
|
||||
"""The timestamp granularities to populate for this transcription.
|
||||
|
||||
@@ -1580,7 +1580,7 @@ class TranscriptionSegment(OpenAIBaseModel):
|
||||
text: str
|
||||
"""Text content of the segment."""
|
||||
|
||||
tokens: List[int]
|
||||
tokens: list[int]
|
||||
"""Array of token IDs for the text content."""
|
||||
|
||||
|
||||
@@ -1594,8 +1594,8 @@ class TranscriptionResponseVerbose(OpenAIBaseModel):
|
||||
text: str
|
||||
"""The transcribed text."""
|
||||
|
||||
segments: Optional[List[TranscriptionSegment]] = None
|
||||
segments: Optional[list[TranscriptionSegment]] = None
|
||||
"""Segments of the transcribed text and their corresponding details."""
|
||||
|
||||
words: Optional[List[TranscriptionWord]] = None
|
||||
words: Optional[list[TranscriptionWord]] = None
|
||||
"""Extracted words and their corresponding timestamps."""
|
||||
|
||||
Reference in New Issue
Block a user