Update deprecated type hinting in platform, plugins, triton_utils, vllm_flash_attn (#18129)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
This commit is contained in:
Harry Mellor
2025-05-14 13:28:16 +01:00
committed by GitHub
parent dc372b9c8a
commit c8ea982d9b
6 changed files with 18 additions and 24 deletions

View File

@@ -2,7 +2,7 @@
import os
from functools import cache, lru_cache, wraps
from typing import TYPE_CHECKING, Dict, List, Optional
from typing import TYPE_CHECKING, Optional
import torch
@@ -35,7 +35,7 @@ except ImportError as e:
logger.warning("Failed to import from vllm._rocm_C with %r", e)
# Models not supported by ROCm.
_ROCM_UNSUPPORTED_MODELS: List[str] = []
_ROCM_UNSUPPORTED_MODELS: list[str] = []
# Models partially supported by ROCm.
# Architecture -> Reason.
@@ -43,7 +43,7 @@ _ROCM_SWA_REASON = ("Sliding window attention (SWA) is not yet supported in "
"Triton flash attention. For half-precision SWA support, "
"please use CK flash attention by setting "
"`VLLM_USE_TRITON_FLASH_ATTN=0`")
_ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
_ROCM_PARTIALLY_SUPPORTED_MODELS: dict[str, str] = {
"Qwen2ForCausalLM":
_ROCM_SWA_REASON,
"MistralForCausalLM":
@@ -58,7 +58,7 @@ _ROCM_PARTIALLY_SUPPORTED_MODELS: Dict[str, str] = {
"excessive use of shared memory. If this happens, disable Triton FA "
"by setting `VLLM_USE_TRITON_FLASH_ATTN=0`")
}
_ROCM_DEVICE_ID_NAME_MAP: Dict[str, str] = {
_ROCM_DEVICE_ID_NAME_MAP: dict[str, str] = {
"0x74a0": "AMD_Instinct_MI300A",
"0x74a1": "AMD_Instinct_MI300X",
"0x74b5": "AMD_Instinct_MI300X", # MI300X VF
@@ -203,7 +203,7 @@ class RocmPlatform(Platform):
@staticmethod
@with_amdsmi_context
def is_fully_connected(physical_device_ids: List[int]) -> bool:
def is_fully_connected(physical_device_ids: list[int]) -> bool:
"""
Query if the set of gpus are fully connected by xgmi (1 hop)
"""