Update deprecated Python 3.8 typing (#13971)

2025-03-03 01:34:51 +00:00
parent bf33700ecd
commit cf069aa8aa
300 changed files with 2294 additions and 2347 deletions
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -7,7 +7,7 @@ import time
 from concurrent.futures import Future
 from inspect import isclass, signature
 from multiprocessing.connection import Connection
-from typing import Any, List, Optional, Set, Tuple, Type
+from typing import Any, Optional

 import msgspec
 import psutil
@@ -42,7 +42,7 @@ class EngineCore:
    def __init__(
        self,
        vllm_config: VllmConfig,
-        executor_class: Type[Executor],
+        executor_class: type[Executor],
        log_stats: bool,
    ):
        assert vllm_config.model_config.runner_type != "pooling"
@@ -80,7 +80,7 @@ class EngineCore:
        # schedule and execute batches, and is required by pipeline parallelism
        # to eliminate pipeline bubbles.
        self.batch_queue_size = self.model_executor.max_concurrent_batches
-        self.batch_queue: Optional[queue.Queue[Tuple[Future[ModelRunnerOutput],
+        self.batch_queue: Optional[queue.Queue[tuple[Future[ModelRunnerOutput],
                                                     SchedulerOutput]]] = None
        if self.batch_queue_size > 1:
            logger.info("Batch queue is enabled with size %d",
@@ -88,7 +88,7 @@ class EngineCore:
            self.batch_queue = queue.Queue(self.batch_queue_size)

    def _initialize_kv_caches(self,
-                              vllm_config: VllmConfig) -> Tuple[int, int]:
+                              vllm_config: VllmConfig) -> tuple[int, int]:
        start = time.time()

        # Get all kv cache needed by the model
@@ -134,7 +134,7 @@ class EngineCore:

        self.scheduler.add_request(req)

-    def abort_requests(self, request_ids: List[str]):
+    def abort_requests(self, request_ids: list[str]):
        """Abort requests from the scheduler."""

        # TODO: The scheduler doesn't really need to know the
@@ -228,7 +228,7 @@ class EngineCore:
    def remove_lora(self, lora_id: int) -> bool:
        return self.model_executor.remove_lora(lora_id)

-    def list_loras(self) -> Set[int]:
+    def list_loras(self) -> set[int]:
        return self.model_executor.list_loras()

    def pin_lora(self, lora_id: int) -> bool:
@@ -244,7 +244,7 @@ class EngineCoreProc(EngineCore):
        output_path: str,
        ready_pipe: Connection,
        vllm_config: VllmConfig,
-        executor_class: Type[Executor],
+        executor_class: type[Executor],
        log_stats: bool,
    ):
        super().__init__(vllm_config, executor_class, log_stats)
@@ -254,7 +254,7 @@ class EngineCoreProc(EngineCore):
        # and to overlap some serialization/deserialization with the
        # model forward pass.
        # Threads handle Socket <-> Queues and core_busy_loop uses Queue.
-        self.input_queue: queue.Queue[Tuple[EngineCoreRequestType,
+        self.input_queue: queue.Queue[tuple[EngineCoreRequestType,
                                            Any]] = queue.Queue()
        self.output_queue: queue.Queue[EngineCoreOutputs] = queue.Queue()
        threading.Thread(target=self.process_input_socket,