[CI/Build] Avoid CUDA initialization (#8534)

2024-09-18 18:38:11 +08:00
parent e351572900
commit 6ffa3f314c
55 changed files with 256 additions and 256 deletions
--- a/vllm/platforms/cpu.py
+++ b/vllm/platforms/cpu.py
@@ -6,10 +6,10 @@ from .interface import Platform, PlatformEnum
 class CpuPlatform(Platform):
    _enum = PlatformEnum.CPU

-    @staticmethod
-    def get_device_name(device_id: int = 0) -> str:
+    @classmethod
+    def get_device_name(cls, device_id: int = 0) -> str:
        return "cpu"

-    @staticmethod
-    def inference_mode():
+    @classmethod
+    def inference_mode(cls):
        return torch.no_grad()
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -11,7 +11,7 @@ from typing_extensions import ParamSpec

 from vllm.logger import init_logger

-from .interface import Platform, PlatformEnum
+from .interface import DeviceCapability, Platform, PlatformEnum

 logger = init_logger(__name__)

@@ -96,19 +96,20 @@ def device_id_to_physical_device_id(device_id: int) -> int:
 class CudaPlatform(Platform):
    _enum = PlatformEnum.CUDA

-    @staticmethod
-    def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
+    @classmethod
+    def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
        physical_device_id = device_id_to_physical_device_id(device_id)
-        return get_physical_device_capability(physical_device_id)
+        major, minor = get_physical_device_capability(physical_device_id)
+        return DeviceCapability(major=major, minor=minor)

-    @staticmethod
-    def get_device_name(device_id: int = 0) -> str:
+    @classmethod
+    def get_device_name(cls, device_id: int = 0) -> str:
        physical_device_id = device_id_to_physical_device_id(device_id)
        return get_physical_device_name(physical_device_id)

-    @staticmethod
+    @classmethod
    @with_nvml_context
-    def is_full_nvlink(physical_device_ids: List[int]) -> bool:
+    def is_full_nvlink(cls, physical_device_ids: List[int]) -> bool:
        """
        query if the set of gpus are fully connected by nvlink (1 hop)
        """
--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -1,5 +1,5 @@
 import enum
-from typing import Optional, Tuple
+from typing import NamedTuple, Optional, Tuple, Union

 import torch

@@ -12,6 +12,23 @@ class PlatformEnum(enum.Enum):
    UNSPECIFIED = enum.auto()


+class DeviceCapability(NamedTuple):
+    major: int
+    minor: int
+
+    def as_version_str(self) -> str:
+        return f"{self.major}.{self.minor}"
+
+    def to_int(self) -> int:
+        """
+        Express device capability as an integer ``<major><minor>``.
+
+        It is assumed that the minor version is always a single digit.
+        """
+        assert 0 <= self.minor < 10
+        return self.major * 10 + self.minor
+
+
 class Platform:
    _enum: PlatformEnum

@@ -27,16 +44,47 @@ class Platform:
    def is_cpu(self) -> bool:
        return self._enum == PlatformEnum.CPU

-    @staticmethod
-    def get_device_capability(device_id: int = 0) -> Optional[Tuple[int, int]]:
+    def is_cuda_alike(self) -> bool:
+        """Stateless version of :func:`torch.cuda.is_available`."""
+        return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
+
+    @classmethod
+    def get_device_capability(
+        cls,
+        device_id: int = 0,
+    ) -> Optional[DeviceCapability]:
+        """Stateless version of :func:`torch.cuda.get_device_capability`."""
        return None

-    @staticmethod
-    def get_device_name(device_id: int = 0) -> str:
+    @classmethod
+    def has_device_capability(
+        cls,
+        capability: Union[Tuple[int, int], int],
+        device_id: int = 0,
+    ) -> bool:
+        """
+        Test whether this platform is compatible with a device capability.
+
+        The ``capability`` argument can either be:
+
+        - A tuple ``(major, minor)``.
+        - An integer ``<major><minor>``. (See :meth:`DeviceCapability.to_int`)
+        """
+        current_capability = cls.get_device_capability(device_id=device_id)
+        if current_capability is None:
+            return False
+
+        if isinstance(capability, tuple):
+            return current_capability >= capability
+
+        return current_capability.to_int() >= capability
+
+    @classmethod
+    def get_device_name(cls, device_id: int = 0) -> str:
        raise NotImplementedError

-    @staticmethod
-    def inference_mode():
+    @classmethod
+    def inference_mode(cls):
        """A device-specific wrapper of `torch.inference_mode`.

        This wrapper is recommended because some hardware backends such as TPU
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -1,12 +1,11 @@
 import os
 from functools import lru_cache
-from typing import Tuple

 import torch

 from vllm.logger import init_logger

-from .interface import Platform, PlatformEnum
+from .interface import DeviceCapability, Platform, PlatformEnum

 logger = init_logger(__name__)

@@ -20,12 +19,13 @@ if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD", None) in ["fork", None]:
 class RocmPlatform(Platform):
    _enum = PlatformEnum.ROCM

-    @staticmethod
+    @classmethod
    @lru_cache(maxsize=8)
-    def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
-        return torch.cuda.get_device_capability(device_id)
+    def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
+        major, minor = torch.cuda.get_device_capability(device_id)
+        return DeviceCapability(major=major, minor=minor)

-    @staticmethod
+    @classmethod
    @lru_cache(maxsize=8)
-    def get_device_name(device_id: int = 0) -> str:
+    def get_device_name(cls, device_id: int = 0) -> str:
        return torch.cuda.get_device_name(device_id)
--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -6,6 +6,10 @@ from .interface import Platform, PlatformEnum
 class TpuPlatform(Platform):
    _enum = PlatformEnum.TPU

-    @staticmethod
-    def inference_mode():
+    @classmethod
+    def get_device_name(cls, device_id: int = 0) -> str:
+        raise NotImplementedError
+
+    @classmethod
+    def inference_mode(cls):
        return torch.no_grad()