[Hardware][Intel] Support compressed-tensor W8A8 for CPU backend (#7257)
This commit is contained in:
@@ -42,6 +42,13 @@ try:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
is_cpu = False
|
||||
try:
|
||||
from importlib.metadata import version
|
||||
is_cpu = "cpu" in version("vllm")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if is_tpu:
|
||||
# people might install pytorch built with cuda but run on tpu
|
||||
# so we need to check tpu first
|
||||
@@ -53,6 +60,9 @@ elif is_cuda:
|
||||
elif is_rocm:
|
||||
from .rocm import RocmPlatform
|
||||
current_platform = RocmPlatform()
|
||||
elif is_cpu:
|
||||
from .cpu import CpuPlatform
|
||||
current_platform = CpuPlatform()
|
||||
else:
|
||||
current_platform = UnspecifiedPlatform()
|
||||
|
||||
|
||||
15
vllm/platforms/cpu.py
Normal file
15
vllm/platforms/cpu.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import torch
|
||||
|
||||
from .interface import Platform, PlatformEnum
|
||||
|
||||
|
||||
class CpuPlatform(Platform):
|
||||
_enum = PlatformEnum.CPU
|
||||
|
||||
@staticmethod
|
||||
def get_device_name(device_id: int = 0) -> str:
|
||||
return "cpu"
|
||||
|
||||
@staticmethod
|
||||
def inference_mode():
|
||||
return torch.no_grad()
|
||||
@@ -1,5 +1,5 @@
|
||||
import enum
|
||||
from typing import Tuple
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import torch
|
||||
|
||||
@@ -8,6 +8,7 @@ class PlatformEnum(enum.Enum):
|
||||
CUDA = enum.auto()
|
||||
ROCM = enum.auto()
|
||||
TPU = enum.auto()
|
||||
CPU = enum.auto()
|
||||
UNSPECIFIED = enum.auto()
|
||||
|
||||
|
||||
@@ -23,9 +24,12 @@ class Platform:
|
||||
def is_tpu(self) -> bool:
|
||||
return self._enum == PlatformEnum.TPU
|
||||
|
||||
def is_cpu(self) -> bool:
|
||||
return self._enum == PlatformEnum.CPU
|
||||
|
||||
@staticmethod
|
||||
def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
|
||||
raise NotImplementedError
|
||||
def get_device_capability(device_id: int = 0) -> Optional[Tuple[int, int]]:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def get_device_name(device_id: int = 0) -> str:
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
from typing import Tuple
|
||||
|
||||
import torch
|
||||
|
||||
from .interface import Platform, PlatformEnum
|
||||
@@ -8,10 +6,6 @@ from .interface import Platform, PlatformEnum
|
||||
class TpuPlatform(Platform):
|
||||
_enum = PlatformEnum.TPU
|
||||
|
||||
@staticmethod
|
||||
def get_device_capability(device_id: int = 0) -> Tuple[int, int]:
|
||||
raise RuntimeError("TPU does not have device capability.")
|
||||
|
||||
@staticmethod
|
||||
def inference_mode():
|
||||
return torch.no_grad()
|
||||
|
||||
Reference in New Issue
Block a user