Update deprecated Python 3.8 typing (#13971)

This commit is contained in:
Harry Mellor
2025-03-03 01:34:51 +00:00
committed by GitHub
parent bf33700ecd
commit cf069aa8aa
300 changed files with 2294 additions and 2347 deletions

View File

@@ -7,7 +7,7 @@ import time
from concurrent.futures import Future
from inspect import isclass, signature
from multiprocessing.connection import Connection
from typing import Any, List, Optional, Set, Tuple, Type
from typing import Any, Optional
import msgspec
import psutil
@@ -42,7 +42,7 @@ class EngineCore:
def __init__(
self,
vllm_config: VllmConfig,
executor_class: Type[Executor],
executor_class: type[Executor],
log_stats: bool,
):
assert vllm_config.model_config.runner_type != "pooling"
@@ -80,7 +80,7 @@ class EngineCore:
# schedule and execute batches, and is required by pipeline parallelism
# to eliminate pipeline bubbles.
self.batch_queue_size = self.model_executor.max_concurrent_batches
self.batch_queue: Optional[queue.Queue[Tuple[Future[ModelRunnerOutput],
self.batch_queue: Optional[queue.Queue[tuple[Future[ModelRunnerOutput],
SchedulerOutput]]] = None
if self.batch_queue_size > 1:
logger.info("Batch queue is enabled with size %d",
@@ -88,7 +88,7 @@ class EngineCore:
self.batch_queue = queue.Queue(self.batch_queue_size)
def _initialize_kv_caches(self,
vllm_config: VllmConfig) -> Tuple[int, int]:
vllm_config: VllmConfig) -> tuple[int, int]:
start = time.time()
# Get all kv cache needed by the model
@@ -134,7 +134,7 @@ class EngineCore:
self.scheduler.add_request(req)
def abort_requests(self, request_ids: List[str]):
def abort_requests(self, request_ids: list[str]):
"""Abort requests from the scheduler."""
# TODO: The scheduler doesn't really need to know the
@@ -228,7 +228,7 @@ class EngineCore:
def remove_lora(self, lora_id: int) -> bool:
return self.model_executor.remove_lora(lora_id)
def list_loras(self) -> Set[int]:
def list_loras(self) -> set[int]:
return self.model_executor.list_loras()
def pin_lora(self, lora_id: int) -> bool:
@@ -244,7 +244,7 @@ class EngineCoreProc(EngineCore):
output_path: str,
ready_pipe: Connection,
vllm_config: VllmConfig,
executor_class: Type[Executor],
executor_class: type[Executor],
log_stats: bool,
):
super().__init__(vllm_config, executor_class, log_stats)
@@ -254,7 +254,7 @@ class EngineCoreProc(EngineCore):
# and to overlap some serialization/deserialization with the
# model forward pass.
# Threads handle Socket <-> Queues and core_busy_loop uses Queue.
self.input_queue: queue.Queue[Tuple[EngineCoreRequestType,
self.input_queue: queue.Queue[tuple[EngineCoreRequestType,
Any]] = queue.Queue()
self.output_queue: queue.Queue[EngineCoreOutputs] = queue.Queue()
threading.Thread(target=self.process_input_socket,