Update deprecated Python 3.8 typing (#13971)

2025-03-03 01:34:51 +00:00
parent bf33700ecd
commit cf069aa8aa
300 changed files with 2294 additions and 2347 deletions
--- a/vllm/entrypoints/openai/serving_chat.py
+++ b/vllm/entrypoints/openai/serving_chat.py
@@ -3,10 +3,9 @@
 import asyncio
 import json
 import time
-from typing import (AsyncGenerator, AsyncIterator, Callable, Dict, Final, List,
-                    Optional)
-from typing import Sequence as GenericSequence
-from typing import Union
+from collections.abc import AsyncGenerator, AsyncIterator
+from collections.abc import Sequence as GenericSequence
+from typing import Callable, Final, Optional, Union

 from fastapi import Request

@@ -205,7 +204,7 @@ class OpenAIServingChat(OpenAIServing):
            raw_request.state.request_metadata = request_metadata

        # Schedule the request and get the result generator.
-        generators: List[AsyncGenerator[RequestOutput, None]] = []
+        generators: list[AsyncGenerator[RequestOutput, None]] = []
        try:
            for i, engine_prompt in enumerate(engine_prompts):
                sampling_params: Union[SamplingParams, BeamSearchParams]
@@ -282,7 +281,7 @@ class OpenAIServingChat(OpenAIServing):
        result_generator: AsyncIterator[RequestOutput],
        request_id: str,
        model_name: str,
-        conversation: List[ConversationMessage],
+        conversation: list[ConversationMessage],
        tokenizer: AnyTokenizer,
        request_metadata: RequestResponseMetadata,
    ) -> AsyncGenerator[str, None]:
@@ -310,7 +309,7 @@ class OpenAIServingChat(OpenAIServing):
        should_stream_with_reasoning_parsing = (
            self._should_stream_with_reasoning_parsing(request))

-        all_previous_token_ids: Optional[List[List[int]]]
+        all_previous_token_ids: Optional[list[list[int]]]

        # Only one of these will be used, thus previous_texts and
        # all_previous_token_ids will not be used twice in the same iteration.
@@ -339,7 +338,7 @@ class OpenAIServingChat(OpenAIServing):
        # Prepare the tool parser if it's needed
        try:
            if tool_choice_auto and self.tool_parser:
-                tool_parsers: List[Optional[ToolParser]] = [
+                tool_parsers: list[Optional[ToolParser]] = [
                    self.tool_parser(tokenizer)
                ] * num_choices
            else:
@@ -406,7 +405,7 @@ class OpenAIServingChat(OpenAIServing):
                    # Send response to echo the input portion of the
                    # last message
                    if request.echo:
-                        last_msg_content: Union[str, List[Dict[str, str]]] = ""
+                        last_msg_content: Union[str, list[dict[str, str]]] = ""
                        if conversation and "content" in conversation[
                                -1] and conversation[-1].get("role") == role:
                            last_msg_content = conversation[-1]["content"] or ""
@@ -674,7 +673,7 @@ class OpenAIServingChat(OpenAIServing):
        result_generator: AsyncIterator[RequestOutput],
        request_id: str,
        model_name: str,
-        conversation: List[ConversationMessage],
+        conversation: list[ConversationMessage],
        tokenizer: AnyTokenizer,
        request_metadata: RequestResponseMetadata,
    ) -> Union[ErrorResponse, ChatCompletionResponse]:
@@ -693,7 +692,7 @@ class OpenAIServingChat(OpenAIServing):

        assert final_res is not None

-        choices: List[ChatCompletionResponseChoice] = []
+        choices: list[ChatCompletionResponseChoice] = []

        role = self.get_chat_request_role(request)
        for output in final_res.outputs:
@@ -812,7 +811,7 @@ class OpenAIServingChat(OpenAIServing):
            choices.append(choice_data)

        if request.echo:
-            last_msg_content: Union[str, List[Dict[str, str]]] = ""
+            last_msg_content: Union[str, list[dict[str, str]]] = ""
            if conversation and "content" in conversation[-1] and conversation[
                    -1].get("role") == role:
                last_msg_content = conversation[-1]["content"] or ""
@@ -853,8 +852,8 @@ class OpenAIServingChat(OpenAIServing):
        return response

    def _get_top_logprobs(
-            self, logprobs: Dict[int, Logprob], top_logprobs: Optional[int],
-            tokenizer: AnyTokenizer) -> List[ChatCompletionLogProb]:
+            self, logprobs: dict[int, Logprob], top_logprobs: Optional[int],
+            tokenizer: AnyTokenizer) -> list[ChatCompletionLogProb]:
        return [
            ChatCompletionLogProb(token=(token := self._get_decoded_token(
                p[1],
@@ -871,12 +870,12 @@ class OpenAIServingChat(OpenAIServing):
    def _create_chat_logprobs(
        self,
        token_ids: GenericSequence[int],
-        top_logprobs: GenericSequence[Optional[Dict[int, Logprob]]],
+        top_logprobs: GenericSequence[Optional[dict[int, Logprob]]],
        tokenizer: AnyTokenizer,
        num_output_top_logprobs: Optional[int] = None,
    ) -> ChatCompletionLogProbs:
        """Create OpenAI-style logprobs."""
-        logprobs_content: List[ChatCompletionLogProbsContent] = []
+        logprobs_content: list[ChatCompletionLogProbsContent] = []

        for i, token_id in enumerate(token_ids):
            step_top_logprobs = top_logprobs[i]