[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143)

Signed-off-by: yuwenzho <yuwen.zhou@intel.com>
Signed-off-by: Chendi.Xue <chendi.xue@intel.com>
Signed-off-by: Bob Zhu <bob.zhu@intel.com>
Signed-off-by: zehao-intel <zehao.huang@intel.com>
Signed-off-by: Konrad Zawora <kzawora@habana.ai>
Co-authored-by: Kunshang Ji <kunshang.ji@intel.com>
Co-authored-by: Sanju C Sudhakaran <scsudhakaran@habana.ai>
Co-authored-by: Michal Adamczyk <madamczyk@habana.ai>
Co-authored-by: Marceli Fylcek <mfylcek@habana.ai>
Co-authored-by: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com>
Co-authored-by: Vivek Goel <vgoel@habana.ai>
Co-authored-by: yuwenzho <yuwen.zhou@intel.com>
Co-authored-by: Dominika Olszewska <dolszewska@habana.ai>
Co-authored-by: barak goldberg <149692267+bgoldberg-habana@users.noreply.github.com>
Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com>
Co-authored-by: Jan Kaniecki <jkaniecki@habana.ai>
Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyniewicz-habana@users.noreply.github.com>
Co-authored-by: Krzysztof Wisniewski <kwisniewski@habana.ai>
Co-authored-by: Dudi Lester <160421192+dudilester@users.noreply.github.com>
Co-authored-by: Ilia Taraban <tarabanil@gmail.com>
Co-authored-by: Chendi.Xue <chendi.xue@intel.com>
Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai>
Co-authored-by: Jakub Maksymczuk <jmaksymczuk@habana.ai>
Co-authored-by: Tomasz Zielinski <85164140+tzielinski-habana@users.noreply.github.com>
Co-authored-by: Sun Choi <schoi@habana.ai>
Co-authored-by: Iryna Boiko <iboiko@habana.ai>
Co-authored-by: Bob Zhu <41610754+czhu15@users.noreply.github.com>
Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com>
Co-authored-by: Zehao Huang <zehao.huang@intel.com>
Co-authored-by: Andrzej Kotłowski <Andrzej.Kotlowski@intel.com>
Co-authored-by: Yan Tomsinsky <73292515+Yantom1@users.noreply.github.com>
Co-authored-by: Nir David <ndavid@habana.ai>
Co-authored-by: Yu-Zhou <yu.zhou@intel.com>
Co-authored-by: Ruheena Suhani Shaik <rsshaik@habana.ai>
Co-authored-by: Karol Damaszke <kdamaszke@habana.ai>
Co-authored-by: Marcin Swiniarski <mswiniarski@habana.ai>
Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Co-authored-by: Jacek Czaja <jacek.czaja@intel.com>
Co-authored-by: Jacek Czaja <jczaja@habana.ai>
Co-authored-by: Yuan <yuan.zhou@outlook.com>
This commit is contained in:
Konrad Zawora
2024-11-06 10:09:10 +01:00
committed by GitHub
parent a5fda50a10
commit a02a50e6e5
31 changed files with 4279 additions and 20 deletions

View File

@@ -253,6 +253,24 @@ class cmake_build_ext(build_ext):
self.copy_file(file, dst_file)
def _is_hpu() -> bool:
is_hpu_available = True
try:
subprocess.run(["hl-smi"], capture_output=True, check=True)
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
if not os.path.exists('/dev/accel/accel0') and not os.path.exists(
'/dev/accel/accel_controlD0'):
# last resort...
try:
output = subprocess.check_output(
'lsmod | grep habanalabs | wc -l', shell=True)
is_hpu_available = int(output) > 0
except (ValueError, FileNotFoundError, PermissionError,
subprocess.CalledProcessError):
is_hpu_available = False
return is_hpu_available or VLLM_TARGET_DEVICE == "hpu"
def _no_device() -> bool:
return VLLM_TARGET_DEVICE == "empty"
@@ -260,7 +278,7 @@ def _no_device() -> bool:
def _is_cuda() -> bool:
has_cuda = torch.version.cuda is not None
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
and not (_is_neuron() or _is_tpu()))
and not (_is_neuron() or _is_tpu() or _is_hpu()))
def _is_hip() -> bool:
@@ -356,6 +374,23 @@ def get_path(*filepath) -> str:
return os.path.join(ROOT_DIR, *filepath)
def get_gaudi_sw_version():
"""
Returns the driver version.
"""
# Enable console printing for `hl-smi` check
output = subprocess.run("hl-smi",
shell=True,
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
env={"ENABLE_CONSOLE": "true"})
if output.returncode == 0 and output.stdout:
return output.stdout.split("\n")[2].replace(
" ", "").split(":")[1][:-1].split("-")[0]
return "0.0.0" # when hl-smi is not available
def get_vllm_version() -> str:
version = get_version(
write_to="vllm/_version.py", # TODO: move this to pyproject.toml
@@ -385,6 +420,12 @@ def get_vllm_version() -> str:
if neuron_version != MAIN_CUDA_VERSION:
neuron_version_str = neuron_version.replace(".", "")[:3]
version += f"{sep}neuron{neuron_version_str}"
elif _is_hpu():
# Get the Intel Gaudi Software Suite version
gaudi_sw_version = str(get_gaudi_sw_version())
if gaudi_sw_version != MAIN_CUDA_VERSION:
gaudi_sw_version = gaudi_sw_version.replace(".", "")[:3]
version += f"{sep}gaudi{gaudi_sw_version}"
elif _is_openvino():
version += f"{sep}openvino"
elif _is_tpu():
@@ -443,6 +484,8 @@ def get_requirements() -> List[str]:
requirements = _read_requirements("requirements-rocm.txt")
elif _is_neuron():
requirements = _read_requirements("requirements-neuron.txt")
elif _is_hpu():
requirements = _read_requirements("requirements-hpu.txt")
elif _is_openvino():
requirements = _read_requirements("requirements-openvino.txt")
elif _is_tpu():
@@ -453,7 +496,7 @@ def get_requirements() -> List[str]:
requirements = _read_requirements("requirements-xpu.txt")
else:
raise ValueError(
"Unsupported platform, please use CUDA, ROCm, Neuron, "
"Unsupported platform, please use CUDA, ROCm, Neuron, HPU, "
"OpenVINO, or CPU.")
return requirements