Files
vllm/vllm/platforms/interface.py

135 lines
3.7 KiB
Python
Raw Normal View History

import enum
import random
from typing import NamedTuple, Optional, Tuple, Union
import numpy as np
import torch
class PlatformEnum(enum.Enum):
CUDA = enum.auto()
ROCM = enum.auto()
TPU = enum.auto()
[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143) Signed-off-by: yuwenzho <yuwen.zhou@intel.com> Signed-off-by: Chendi.Xue <chendi.xue@intel.com> Signed-off-by: Bob Zhu <bob.zhu@intel.com> Signed-off-by: zehao-intel <zehao.huang@intel.com> Signed-off-by: Konrad Zawora <kzawora@habana.ai> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Sanju C Sudhakaran <scsudhakaran@habana.ai> Co-authored-by: Michal Adamczyk <madamczyk@habana.ai> Co-authored-by: Marceli Fylcek <mfylcek@habana.ai> Co-authored-by: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com> Co-authored-by: Vivek Goel <vgoel@habana.ai> Co-authored-by: yuwenzho <yuwen.zhou@intel.com> Co-authored-by: Dominika Olszewska <dolszewska@habana.ai> Co-authored-by: barak goldberg <149692267+bgoldberg-habana@users.noreply.github.com> Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com> Co-authored-by: Jan Kaniecki <jkaniecki@habana.ai> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyniewicz-habana@users.noreply.github.com> Co-authored-by: Krzysztof Wisniewski <kwisniewski@habana.ai> Co-authored-by: Dudi Lester <160421192+dudilester@users.noreply.github.com> Co-authored-by: Ilia Taraban <tarabanil@gmail.com> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai> Co-authored-by: Jakub Maksymczuk <jmaksymczuk@habana.ai> Co-authored-by: Tomasz Zielinski <85164140+tzielinski-habana@users.noreply.github.com> Co-authored-by: Sun Choi <schoi@habana.ai> Co-authored-by: Iryna Boiko <iboiko@habana.ai> Co-authored-by: Bob Zhu <41610754+czhu15@users.noreply.github.com> Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> Co-authored-by: Zehao Huang <zehao.huang@intel.com> Co-authored-by: Andrzej Kotłowski <Andrzej.Kotlowski@intel.com> Co-authored-by: Yan Tomsinsky <73292515+Yantom1@users.noreply.github.com> Co-authored-by: Nir David <ndavid@habana.ai> Co-authored-by: Yu-Zhou <yu.zhou@intel.com> Co-authored-by: Ruheena Suhani Shaik <rsshaik@habana.ai> Co-authored-by: Karol Damaszke <kdamaszke@habana.ai> Co-authored-by: Marcin Swiniarski <mswiniarski@habana.ai> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Jacek Czaja <jacek.czaja@intel.com> Co-authored-by: Jacek Czaja <jczaja@habana.ai> Co-authored-by: Yuan <yuan.zhou@outlook.com>
2024-11-06 10:09:10 +01:00
HPU = enum.auto()
XPU = enum.auto()
CPU = enum.auto()
NEURON = enum.auto()
OPENVINO = enum.auto()
UNSPECIFIED = enum.auto()
class DeviceCapability(NamedTuple):
major: int
minor: int
def as_version_str(self) -> str:
return f"{self.major}.{self.minor}"
def to_int(self) -> int:
"""
Express device capability as an integer ``<major><minor>``.
It is assumed that the minor version is always a single digit.
"""
assert 0 <= self.minor < 10
return self.major * 10 + self.minor
class Platform:
_enum: PlatformEnum
def is_cuda(self) -> bool:
return self._enum == PlatformEnum.CUDA
def is_rocm(self) -> bool:
return self._enum == PlatformEnum.ROCM
def is_tpu(self) -> bool:
return self._enum == PlatformEnum.TPU
[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143) Signed-off-by: yuwenzho <yuwen.zhou@intel.com> Signed-off-by: Chendi.Xue <chendi.xue@intel.com> Signed-off-by: Bob Zhu <bob.zhu@intel.com> Signed-off-by: zehao-intel <zehao.huang@intel.com> Signed-off-by: Konrad Zawora <kzawora@habana.ai> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Sanju C Sudhakaran <scsudhakaran@habana.ai> Co-authored-by: Michal Adamczyk <madamczyk@habana.ai> Co-authored-by: Marceli Fylcek <mfylcek@habana.ai> Co-authored-by: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com> Co-authored-by: Vivek Goel <vgoel@habana.ai> Co-authored-by: yuwenzho <yuwen.zhou@intel.com> Co-authored-by: Dominika Olszewska <dolszewska@habana.ai> Co-authored-by: barak goldberg <149692267+bgoldberg-habana@users.noreply.github.com> Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com> Co-authored-by: Jan Kaniecki <jkaniecki@habana.ai> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyniewicz-habana@users.noreply.github.com> Co-authored-by: Krzysztof Wisniewski <kwisniewski@habana.ai> Co-authored-by: Dudi Lester <160421192+dudilester@users.noreply.github.com> Co-authored-by: Ilia Taraban <tarabanil@gmail.com> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai> Co-authored-by: Jakub Maksymczuk <jmaksymczuk@habana.ai> Co-authored-by: Tomasz Zielinski <85164140+tzielinski-habana@users.noreply.github.com> Co-authored-by: Sun Choi <schoi@habana.ai> Co-authored-by: Iryna Boiko <iboiko@habana.ai> Co-authored-by: Bob Zhu <41610754+czhu15@users.noreply.github.com> Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> Co-authored-by: Zehao Huang <zehao.huang@intel.com> Co-authored-by: Andrzej Kotłowski <Andrzej.Kotlowski@intel.com> Co-authored-by: Yan Tomsinsky <73292515+Yantom1@users.noreply.github.com> Co-authored-by: Nir David <ndavid@habana.ai> Co-authored-by: Yu-Zhou <yu.zhou@intel.com> Co-authored-by: Ruheena Suhani Shaik <rsshaik@habana.ai> Co-authored-by: Karol Damaszke <kdamaszke@habana.ai> Co-authored-by: Marcin Swiniarski <mswiniarski@habana.ai> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Jacek Czaja <jacek.czaja@intel.com> Co-authored-by: Jacek Czaja <jczaja@habana.ai> Co-authored-by: Yuan <yuan.zhou@outlook.com>
2024-11-06 10:09:10 +01:00
def is_hpu(self) -> bool:
return self._enum == PlatformEnum.HPU
def is_xpu(self) -> bool:
return self._enum == PlatformEnum.XPU
def is_cpu(self) -> bool:
return self._enum == PlatformEnum.CPU
def is_neuron(self) -> bool:
return self._enum == PlatformEnum.NEURON
def is_openvino(self) -> bool:
return self._enum == PlatformEnum.OPENVINO
def is_cuda_alike(self) -> bool:
"""Stateless version of :func:`torch.cuda.is_available`."""
return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)
@classmethod
def get_device_capability(
cls,
device_id: int = 0,
) -> Optional[DeviceCapability]:
"""Stateless version of :func:`torch.cuda.get_device_capability`."""
return None
@classmethod
def has_device_capability(
cls,
capability: Union[Tuple[int, int], int],
device_id: int = 0,
) -> bool:
"""
Test whether this platform is compatible with a device capability.
The ``capability`` argument can either be:
- A tuple ``(major, minor)``.
- An integer ``<major><minor>``. (See :meth:`DeviceCapability.to_int`)
"""
current_capability = cls.get_device_capability(device_id=device_id)
if current_capability is None:
return False
if isinstance(capability, tuple):
return current_capability >= capability
return current_capability.to_int() >= capability
@classmethod
def get_device_name(cls, device_id: int = 0) -> str:
"""Get the name of a device."""
raise NotImplementedError
@classmethod
def get_device_total_memory(cls, device_id: int = 0) -> int:
"""Get the total memory of a device in bytes."""
raise NotImplementedError
@classmethod
def inference_mode(cls):
"""A device-specific wrapper of `torch.inference_mode`.
This wrapper is recommended because some hardware backends such as TPU
do not support `torch.inference_mode`. In such a case, they will fall
back to `torch.no_grad` by overriding this method.
"""
return torch.inference_mode(mode=True)
@classmethod
def seed_everything(cls, seed: int) -> None:
"""
Set the seed of each random module.
`torch.manual_seed` will set seed on all devices.
Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
"""
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
class UnspecifiedPlatform(Platform):
_enum = PlatformEnum.UNSPECIFIED