Update deprecated type hinting in platform, plugins, triton_utils, vllm_flash_attn (#18129)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-05-14 13:28:16 +01:00
parent dc372b9c8a
commit c8ea982d9b
6 changed files with 18 additions and 24 deletions
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -2,7 +2,7 @@

 import os
 from functools import cache, lru_cache, wraps
-from typing import TYPE_CHECKING, Dict, List, Optional
+from typing import TYPE_CHECKING, Optional

 import torch

@@ -35,7 +35,7 @@ except ImportError as e:
    logger.warning("Failed to import from vllm._rocm_C with %r", e)

 # Models not supported by ROCm.
-_ROCM_UNSUPPORTED_MODELS: List[str] = []
+_ROCM_UNSUPPORTED_MODELS: list[str] = []

 # Models partially supported by ROCm.
 # Architecture -> Reason.
@@ -43,7 +43,7 @@ _ROCM_SWA_REASON = ("Sliding window attention (SWA) is not yet supported in "
                    "Triton flash attention. For half-precision SWA support, "
                    "please use CK flash attention by setting "
                    "`VLLM_USE_TRITON_FLASH_ATTN=0`")
-_ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
+_ROCM_PARTIALLY_SUPPORTED_MODELS: dict[str, str] = {
    "Qwen2ForCausalLM":
    _ROCM_SWA_REASON,
    "MistralForCausalLM":
@@ -58,7 +58,7 @@ _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
     "excessive use of shared memory. If this happens, disable Triton FA "
     "by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
 }
-_ROCM_DEVICE_ID_NAME_MAP: Dict[str, str] = {
+_ROCM_DEVICE_ID_NAME_MAP: dict[str, str] = {
    "0x74a0": "AMD_Instinct_MI300A",
    "0x74a1": "AMD_Instinct_MI300X",
    "0x74b5": "AMD_Instinct_MI300X",  # MI300X VF
@@ -203,7 +203,7 @@ class RocmPlatform(Platform):

    @staticmethod
    @with_amdsmi_context
-    def is_fully_connected(physical_device_ids: List[int]) -> bool:
+    def is_fully_connected(physical_device_ids: list[int]) -> bool:
        """
        Query if the set of gpus are fully connected by xgmi (1 hop)
        """