vllm/platforms/interface.py

import enum
import random
from typing import NamedTuple, Optional, Tuple, Union

import numpy as np
import torch


class PlatformEnum(enum.Enum):
    CUDA = enum.auto()
    ROCM = enum.auto()
    TPU = enum.auto()
    HPU = enum.auto()
    XPU = enum.auto()
    CPU = enum.auto()
    NEURON = enum.auto()
    OPENVINO = enum.auto()
    UNSPECIFIED = enum.auto()


class DeviceCapability(NamedTuple):
    major: int
    minor: int

    def as_version_str(self) -> str:
        return f"{self.major}.{self.minor}"

    def to_int(self) -> int:
        """
        Express device capability as an integer ``<major><minor>``.

        It is assumed that the minor version is always a single digit.
        """
        assert 0 <= self.minor < 10
        return self.major * 10 + self.minor


class Platform:
    _enum: PlatformEnum

    def is_cuda(self) -> bool:
        return self._enum == PlatformEnum.CUDA

    def is_rocm(self) -> bool:
        return self._enum == PlatformEnum.ROCM

    def is_tpu(self) -> bool:
        return self._enum == PlatformEnum.TPU

    def is_hpu(self) -> bool:
        return self._enum == PlatformEnum.HPU

    def is_xpu(self) -> bool:
        return self._enum == PlatformEnum.XPU

    def is_cpu(self) -> bool:
        return self._enum == PlatformEnum.CPU

    def is_neuron(self) -> bool:
        return self._enum == PlatformEnum.NEURON

    def is_openvino(self) -> bool:
        return self._enum == PlatformEnum.OPENVINO

    def is_cuda_alike(self) -> bool:
        """Stateless version of :func:`torch.cuda.is_available`."""
        return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)

    @classmethod
    def get_device_capability(
        cls,
        device_id: int = 0,
    ) -> Optional[DeviceCapability]:
        """Stateless version of :func:`torch.cuda.get_device_capability`."""
        return None

    @classmethod
    def has_device_capability(
        cls,
        capability: Union[Tuple[int, int], int],
        device_id: int = 0,
    ) -> bool:
        """
        Test whether this platform is compatible with a device capability.

        The ``capability`` argument can either be:

        - A tuple ``(major, minor)``.
        - An integer ``<major><minor>``. (See :meth:`DeviceCapability.to_int`)
        """
        current_capability = cls.get_device_capability(device_id=device_id)
        if current_capability is None:
            return False

        if isinstance(capability, tuple):
            return current_capability >= capability

        return current_capability.to_int() >= capability

    @classmethod
    def get_device_name(cls, device_id: int = 0) -> str:
        """Get the name of a device."""
        raise NotImplementedError

    @classmethod
    def get_device_total_memory(cls, device_id: int = 0) -> int:
        """Get the total memory of a device in bytes."""
        raise NotImplementedError

    @classmethod
    def inference_mode(cls):
        """A device-specific wrapper of `torch.inference_mode`.

        This wrapper is recommended because some hardware backends such as TPU
        do not support `torch.inference_mode`. In such a case, they will fall
        back to `torch.no_grad` by overriding this method.
        """
        return torch.inference_mode(mode=True)

    @classmethod
    def seed_everything(cls, seed: int) -> None:
        """
        Set the seed of each random module.
        `torch.manual_seed` will set seed on all devices.

        Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20
        """
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)


class UnspecifiedPlatform(Platform):
    _enum = PlatformEnum.UNSPECIFIED
[hardware][misc] introduce platform abstraction (#6080) 2024-07-02 20:12:22 -07:00			`import enum`
[Hardware] using current_platform.seed_everything (#9785) Signed-off-by: wangshuai09 <391746016@qq.com> 2024-10-29 22:47:44 +08:00			`import random`
[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`from typing import NamedTuple, Optional, Tuple, Union`
[hardware][misc] introduce platform abstraction (#6080) 2024-07-02 20:12:22 -07:00
[Hardware] using current_platform.seed_everything (#9785) Signed-off-by: wangshuai09 <391746016@qq.com> 2024-10-29 22:47:44 +08:00			`import numpy as np`
[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`import torch`

[hardware][misc] introduce platform abstraction (#6080) 2024-07-02 20:12:22 -07:00
			`class PlatformEnum(enum.Enum):`
			`CUDA = enum.auto()`
			`ROCM = enum.auto()`
[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`TPU = enum.auto()`
[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143) Signed-off-by: yuwenzho <yuwen.zhou@intel.com> Signed-off-by: Chendi.Xue <chendi.xue@intel.com> Signed-off-by: Bob Zhu <bob.zhu@intel.com> Signed-off-by: zehao-intel <zehao.huang@intel.com> Signed-off-by: Konrad Zawora <kzawora@habana.ai> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Sanju C Sudhakaran <scsudhakaran@habana.ai> Co-authored-by: Michal Adamczyk <madamczyk@habana.ai> Co-authored-by: Marceli Fylcek <mfylcek@habana.ai> Co-authored-by: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com> Co-authored-by: Vivek Goel <vgoel@habana.ai> Co-authored-by: yuwenzho <yuwen.zhou@intel.com> Co-authored-by: Dominika Olszewska <dolszewska@habana.ai> Co-authored-by: barak goldberg <149692267+bgoldberg-habana@users.noreply.github.com> Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com> Co-authored-by: Jan Kaniecki <jkaniecki@habana.ai> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyniewicz-habana@users.noreply.github.com> Co-authored-by: Krzysztof Wisniewski <kwisniewski@habana.ai> Co-authored-by: Dudi Lester <160421192+dudilester@users.noreply.github.com> Co-authored-by: Ilia Taraban <tarabanil@gmail.com> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai> Co-authored-by: Jakub Maksymczuk <jmaksymczuk@habana.ai> Co-authored-by: Tomasz Zielinski <85164140+tzielinski-habana@users.noreply.github.com> Co-authored-by: Sun Choi <schoi@habana.ai> Co-authored-by: Iryna Boiko <iboiko@habana.ai> Co-authored-by: Bob Zhu <41610754+czhu15@users.noreply.github.com> Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> Co-authored-by: Zehao Huang <zehao.huang@intel.com> Co-authored-by: Andrzej Kotłowski <Andrzej.Kotlowski@intel.com> Co-authored-by: Yan Tomsinsky <73292515+Yantom1@users.noreply.github.com> Co-authored-by: Nir David <ndavid@habana.ai> Co-authored-by: Yu-Zhou <yu.zhou@intel.com> Co-authored-by: Ruheena Suhani Shaik <rsshaik@habana.ai> Co-authored-by: Karol Damaszke <kdamaszke@habana.ai> Co-authored-by: Marcin Swiniarski <mswiniarski@habana.ai> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Jacek Czaja <jacek.czaja@intel.com> Co-authored-by: Jacek Czaja <jczaja@habana.ai> Co-authored-by: Yuan <yuan.zhou@outlook.com> 2024-11-06 10:09:10 +01:00			`HPU = enum.auto()`
[Bugfix][Intel] Fix XPU Dockerfile Build (#7824) Signed-off-by: tylertitsworth <tyler.titsworth@intel.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-09-27 23:45:50 -07:00			`XPU = enum.auto()`
[Hardware][Intel] Support compressed-tensor W8A8 for CPU backend (#7257) 2024-09-12 00:46:46 +08:00			`CPU = enum.auto()`
[Neuron] [Bugfix] Fix neuron startup (#9374) Co-authored-by: Jerzy Zagorski <jzagorsk@amazon.com> 2024-10-22 14:51:41 +02:00			`NEURON = enum.auto()`
[Hardware][openvino] is_openvino --> current_platform.is_openvino (#9716) 2024-10-26 18:59:06 +08:00			`OPENVINO = enum.auto()`
[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`UNSPECIFIED = enum.auto()`
[hardware][misc] introduce platform abstraction (#6080) 2024-07-02 20:12:22 -07:00

[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`class DeviceCapability(NamedTuple):`
			`major: int`
			`minor: int`

			`def as_version_str(self) -> str:`
			`return f"{self.major}.{self.minor}"`

			`def to_int(self) -> int:`
			`"""`
			Express device capability as an integer ``<major><minor>``.

			`It is assumed that the minor version is always a single digit.`
			`"""`
			`assert 0 <= self.minor < 10`
			`return self.major * 10 + self.minor`


[hardware][misc] introduce platform abstraction (#6080) 2024-07-02 20:12:22 -07:00			`class Platform:`
			`_enum: PlatformEnum`

			`def is_cuda(self) -> bool:`
			`return self._enum == PlatformEnum.CUDA`

			`def is_rocm(self) -> bool:`
			`return self._enum == PlatformEnum.ROCM`

[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			`def is_tpu(self) -> bool:`
			`return self._enum == PlatformEnum.TPU`

[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143) Signed-off-by: yuwenzho <yuwen.zhou@intel.com> Signed-off-by: Chendi.Xue <chendi.xue@intel.com> Signed-off-by: Bob Zhu <bob.zhu@intel.com> Signed-off-by: zehao-intel <zehao.huang@intel.com> Signed-off-by: Konrad Zawora <kzawora@habana.ai> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Sanju C Sudhakaran <scsudhakaran@habana.ai> Co-authored-by: Michal Adamczyk <madamczyk@habana.ai> Co-authored-by: Marceli Fylcek <mfylcek@habana.ai> Co-authored-by: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com> Co-authored-by: Vivek Goel <vgoel@habana.ai> Co-authored-by: yuwenzho <yuwen.zhou@intel.com> Co-authored-by: Dominika Olszewska <dolszewska@habana.ai> Co-authored-by: barak goldberg <149692267+bgoldberg-habana@users.noreply.github.com> Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com> Co-authored-by: Jan Kaniecki <jkaniecki@habana.ai> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyniewicz-habana@users.noreply.github.com> Co-authored-by: Krzysztof Wisniewski <kwisniewski@habana.ai> Co-authored-by: Dudi Lester <160421192+dudilester@users.noreply.github.com> Co-authored-by: Ilia Taraban <tarabanil@gmail.com> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai> Co-authored-by: Jakub Maksymczuk <jmaksymczuk@habana.ai> Co-authored-by: Tomasz Zielinski <85164140+tzielinski-habana@users.noreply.github.com> Co-authored-by: Sun Choi <schoi@habana.ai> Co-authored-by: Iryna Boiko <iboiko@habana.ai> Co-authored-by: Bob Zhu <41610754+czhu15@users.noreply.github.com> Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> Co-authored-by: Zehao Huang <zehao.huang@intel.com> Co-authored-by: Andrzej Kotłowski <Andrzej.Kotlowski@intel.com> Co-authored-by: Yan Tomsinsky <73292515+Yantom1@users.noreply.github.com> Co-authored-by: Nir David <ndavid@habana.ai> Co-authored-by: Yu-Zhou <yu.zhou@intel.com> Co-authored-by: Ruheena Suhani Shaik <rsshaik@habana.ai> Co-authored-by: Karol Damaszke <kdamaszke@habana.ai> Co-authored-by: Marcin Swiniarski <mswiniarski@habana.ai> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Jacek Czaja <jacek.czaja@intel.com> Co-authored-by: Jacek Czaja <jczaja@habana.ai> Co-authored-by: Yuan <yuan.zhou@outlook.com> 2024-11-06 10:09:10 +01:00			`def is_hpu(self) -> bool:`
			`return self._enum == PlatformEnum.HPU`

[Bugfix][Intel] Fix XPU Dockerfile Build (#7824) Signed-off-by: tylertitsworth <tyler.titsworth@intel.com> Co-authored-by: youkaichao <youkaichao@126.com> 2024-09-27 23:45:50 -07:00			`def is_xpu(self) -> bool:`
			`return self._enum == PlatformEnum.XPU`

[Hardware][Intel] Support compressed-tensor W8A8 for CPU backend (#7257) 2024-09-12 00:46:46 +08:00			`def is_cpu(self) -> bool:`
			`return self._enum == PlatformEnum.CPU`

[Neuron] [Bugfix] Fix neuron startup (#9374) Co-authored-by: Jerzy Zagorski <jzagorsk@amazon.com> 2024-10-22 14:51:41 +02:00			`def is_neuron(self) -> bool:`
			`return self._enum == PlatformEnum.NEURON`

[Hardware][openvino] is_openvino --> current_platform.is_openvino (#9716) 2024-10-26 18:59:06 +08:00			`def is_openvino(self) -> bool:`
			`return self._enum == PlatformEnum.OPENVINO`

[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`def is_cuda_alike(self) -> bool:`
			"""Stateless version of :func:`torch.cuda.is_available`."""
			`return self._enum in (PlatformEnum.CUDA, PlatformEnum.ROCM)`

			`@classmethod`
			`def get_device_capability(`
			`cls,`
			`device_id: int = 0,`
			`) -> Optional[DeviceCapability]:`
			"""Stateless version of :func:`torch.cuda.get_device_capability`."""
[Hardware][Intel] Support compressed-tensor W8A8 for CPU backend (#7257) 2024-09-12 00:46:46 +08:00			`return None`
[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00
[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`@classmethod`
			`def has_device_capability(`
			`cls,`
			`capability: Union[Tuple[int, int], int],`
			`device_id: int = 0,`
			`) -> bool:`
			`"""`
			`Test whether this platform is compatible with a device capability.`

			The ``capability`` argument can either be:

			- A tuple ``(major, minor)``.
			- An integer ``<major><minor>``. (See :meth:`DeviceCapability.to_int`)
			`"""`
			`current_capability = cls.get_device_capability(device_id=device_id)`
			`if current_capability is None:`
			`return False`

			`if isinstance(capability, tuple):`
			`return current_capability >= capability`

			`return current_capability.to_int() >= capability`

			`@classmethod`
			`def get_device_name(cls, device_id: int = 0) -> str:`
[CI/Build] Add test decorator for minimum GPU memory (#8925) 2024-09-29 10:50:51 +08:00			`"""Get the name of a device."""`
			`raise NotImplementedError`

			`@classmethod`
			`def get_device_total_memory(cls, device_id: int = 0) -> int:`
			`"""Get the total memory of a device in bytes."""`
[misc] use nvml to get consistent device name (#7582) 2024-08-16 21:15:13 -07:00			`raise NotImplementedError`

[CI/Build] Avoid CUDA initialization (#8534) 2024-09-18 18:38:11 +08:00			`@classmethod`
			`def inference_mode(cls):`
[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00			"""A device-specific wrapper of `torch.inference_mode`.

			`This wrapper is recommended because some hardware backends such as TPU`
			do not support `torch.inference_mode`. In such a case, they will fall
			back to `torch.no_grad` by overriding this method.
			`"""`
			`return torch.inference_mode(mode=True)`

[Hardware] using current_platform.seed_everything (#9785) Signed-off-by: wangshuai09 <391746016@qq.com> 2024-10-29 22:47:44 +08:00			`@classmethod`
			`def seed_everything(cls, seed: int) -> None:`
			`"""`
			`Set the seed of each random module.`
			`torch.manual_seed` will set seed on all devices.

			`Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20`
			`"""`
			`random.seed(seed)`
			`np.random.seed(seed)`
			`torch.manual_seed(seed)`

[Misc] Add a wrapper for torch.inference_mode (#6618) 2024-07-21 18:43:11 -07:00
			`class UnspecifiedPlatform(Platform):`
			`_enum = PlatformEnum.UNSPECIFIED`