[CI/Build] Avoid CUDA initialization (#8534)

This commit is contained in:
Cyrus Leung
2024-09-18 18:38:11 +08:00
committed by GitHub
parent e351572900
commit 6ffa3f314c
55 changed files with 256 additions and 256 deletions

View File

@@ -6,10 +6,10 @@ from .interface import Platform, PlatformEnum
class CpuPlatform(Platform):
_enum = PlatformEnum.CPU
@staticmethod
def get_device_name(device_id: int = 0) -> str:
@classmethod
def get_device_name(cls, device_id: int = 0) -> str:
return "cpu"
@staticmethod
def inference_mode():
@classmethod
def inference_mode(cls):
return torch.no_grad()

View File

@@ -11,7 +11,7 @@ from typing_extensions import ParamSpec
from vllm.logger import init_logger
from .interface import Platform, PlatformEnum
from .interface import DeviceCapability, Platform, PlatformEnum
logger = init_logger(__name__)
@@ -96,19 +96,20 @@ def device_id_to_physical_device_id(device_id: int) -> int:
class CudaPlatform(Platform):
_enum = PlatformEnum.CUDA
@staticmethod
def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
@classmethod
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
physical_device_id = device_id_to_physical_device_id(device_id)
return get_physical_device_capability(physical_device_id)
major, minor = get_physical_device_capability(physical_device_id)
return DeviceCapability(major=major, minor=minor)
@staticmethod
def get_device_name(device_id: int = 0) -> str:
@classmethod
def get_device_name(cls, device_id: int = 0) -> str:
physical_device_id = device_id_to_physical_device_id(device_id)
return get_physical_device_name(physical_device_id)
@staticmethod
@classmethod
@with_nvml_context
def is_full_nvlink(physical_device_ids: List[int]) -> bool:
def is_full_nvlink(cls, physical_device_ids: List[int]) -> bool:
"""
query if the set of gpus are fully connected by nvlink (1 hop)
"""

View File

@@ -1,5 +1,5 @@
import enum
from typing import Optional, Tuple
from typing import NamedTuple, Optional, Tuple, Union
import torch
@@ -12,6 +12,23 @@ class PlatformEnum(enum.Enum):
UNSPECIFIED = enum.auto()
class DeviceCapability(NamedTuple):
major: int
minor: int
def as_version_str(self) -> str:
return f"{self.major}.{self.minor}"
def to_int(self) -> int:
"""
Express device capability as an integer ``<major><minor>``.
It is assumed that the minor version is always a single digit.
"""
assert 0 <= self.minor < 10
return self.major * 10 + self.minor
class Platform:
_enum: PlatformEnum
@@ -27,16 +44,47 @@ class Platform:
def is_cpu(self) -> bool:
return self._enum == PlatformEnum.CPU
@staticmethod
def get_device_capability(device_id: int = 0) -> Optional[Tuple[int, int]]:
def is_cuda_alike(self) -> bool:
"""Stateless version of :func:`torch.cuda.is_available`."""
return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
@classmethod
def get_device_capability(
cls,
device_id: int = 0,
) -> Optional[DeviceCapability]:
"""Stateless version of :func:`torch.cuda.get_device_capability`."""
return None
@staticmethod
def get_device_name(device_id: int = 0) -> str:
@classmethod
def has_device_capability(
cls,
capability: Union[Tuple[int, int], int],
device_id: int = 0,
) -> bool:
"""
Test whether this platform is compatible with a device capability.
The ``capability`` argument can either be:
- A tuple ``(major, minor)``.
- An integer ``<major><minor>``. (See :meth:`DeviceCapability.to_int`)
"""
current_capability = cls.get_device_capability(device_id=device_id)
if current_capability is None:
return False
if isinstance(capability, tuple):
return current_capability >= capability
return current_capability.to_int() >= capability
@classmethod
def get_device_name(cls, device_id: int = 0) -> str:
raise NotImplementedError
@staticmethod
def inference_mode():
@classmethod
def inference_mode(cls):
"""A device-specific wrapper of `torch.inference_mode`.
This wrapper is recommended because some hardware backends such as TPU

View File

@@ -1,12 +1,11 @@
import os
from functools import lru_cache
from typing import Tuple
import torch
from vllm.logger import init_logger
from .interface import Platform, PlatformEnum
from .interface import DeviceCapability, Platform, PlatformEnum
logger = init_logger(__name__)
@@ -20,12 +19,13 @@ if os.environ.get("VLLM_WORKER_MULTIPROC_METHOD", None) in ["fork", None]:
class RocmPlatform(Platform):
_enum = PlatformEnum.ROCM
@staticmethod
@classmethod
@lru_cache(maxsize=8)
def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
return torch.cuda.get_device_capability(device_id)
def get_device_capability(cls, device_id: int = 0) -> DeviceCapability:
major, minor = torch.cuda.get_device_capability(device_id)
return DeviceCapability(major=major, minor=minor)
@staticmethod
@classmethod
@lru_cache(maxsize=8)
def get_device_name(device_id: int = 0) -> str:
def get_device_name(cls, device_id: int = 0) -> str:
return torch.cuda.get_device_name(device_id)

View File

@@ -6,6 +6,10 @@ from .interface import Platform, PlatformEnum
class TpuPlatform(Platform):
_enum = PlatformEnum.TPU
@staticmethod
def inference_mode():
@classmethod
def get_device_name(cls, device_id: int = 0) -> str:
raise NotImplementedError
@classmethod
def inference_mode(cls):
return torch.no_grad()