Update deprecated Python 3.8 typing (#13971)

2025-03-03 01:34:51 +00:00
parent bf33700ecd
commit cf069aa8aa
300 changed files with 2294 additions and 2347 deletions
--- a/examples/offline_inference/lora_with_quantization_inference.py
+++ b/examples/offline_inference/lora_with_quantization_inference.py
@@ -7,7 +7,7 @@ Requires HuggingFace credentials for access.
 """

 import gc
-from typing import List, Optional, Tuple
+from typing import Optional

 import torch
 from huggingface_hub import snapshot_download
@@ -18,7 +18,7 @@ from vllm.lora.request import LoRARequest

 def create_test_prompts(
        lora_path: str
-) -> List[Tuple[str, SamplingParams, Optional[LoRARequest]]]:
+) -> list[tuple[str, SamplingParams, Optional[LoRARequest]]]:
    return [
        # this is an example of using quantization without LoRA
        ("My name is",
@@ -49,7 +49,7 @@ def create_test_prompts(


 def process_requests(engine: LLMEngine,
-                     test_prompts: List[Tuple[str, SamplingParams,
+                     test_prompts: list[tuple[str, SamplingParams,
                                              Optional[LoRARequest]]]):
    """Continuously process a list of prompts and handle the outputs."""
    request_id = 0
@@ -63,7 +63,7 @@ def process_requests(engine: LLMEngine,
                               lora_request=lora_request)
            request_id += 1

-        request_outputs: List[RequestOutput] = engine.step()
+        request_outputs: list[RequestOutput] = engine.step()
        for request_output in request_outputs:
            if request_output.finished:
                print("----------------------------------------------------")