[Hardware][Intel-Gaudi] Add Intel Gaudi (HPU) inference backend (#6143)
Signed-off-by: yuwenzho <yuwen.zhou@intel.com> Signed-off-by: Chendi.Xue <chendi.xue@intel.com> Signed-off-by: Bob Zhu <bob.zhu@intel.com> Signed-off-by: zehao-intel <zehao.huang@intel.com> Signed-off-by: Konrad Zawora <kzawora@habana.ai> Co-authored-by: Kunshang Ji <kunshang.ji@intel.com> Co-authored-by: Sanju C Sudhakaran <scsudhakaran@habana.ai> Co-authored-by: Michal Adamczyk <madamczyk@habana.ai> Co-authored-by: Marceli Fylcek <mfylcek@habana.ai> Co-authored-by: Himangshu Lahkar <49579433+hlahkar@users.noreply.github.com> Co-authored-by: Vivek Goel <vgoel@habana.ai> Co-authored-by: yuwenzho <yuwen.zhou@intel.com> Co-authored-by: Dominika Olszewska <dolszewska@habana.ai> Co-authored-by: barak goldberg <149692267+bgoldberg-habana@users.noreply.github.com> Co-authored-by: Michal Szutenberg <37601244+szutenberg@users.noreply.github.com> Co-authored-by: Jan Kaniecki <jkaniecki@habana.ai> Co-authored-by: Agata Dobrzyniewicz <160237065+adobrzyniewicz-habana@users.noreply.github.com> Co-authored-by: Krzysztof Wisniewski <kwisniewski@habana.ai> Co-authored-by: Dudi Lester <160421192+dudilester@users.noreply.github.com> Co-authored-by: Ilia Taraban <tarabanil@gmail.com> Co-authored-by: Chendi.Xue <chendi.xue@intel.com> Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai> Co-authored-by: Jakub Maksymczuk <jmaksymczuk@habana.ai> Co-authored-by: Tomasz Zielinski <85164140+tzielinski-habana@users.noreply.github.com> Co-authored-by: Sun Choi <schoi@habana.ai> Co-authored-by: Iryna Boiko <iboiko@habana.ai> Co-authored-by: Bob Zhu <41610754+czhu15@users.noreply.github.com> Co-authored-by: hlin99 <73271530+hlin99@users.noreply.github.com> Co-authored-by: Zehao Huang <zehao.huang@intel.com> Co-authored-by: Andrzej Kotłowski <Andrzej.Kotlowski@intel.com> Co-authored-by: Yan Tomsinsky <73292515+Yantom1@users.noreply.github.com> Co-authored-by: Nir David <ndavid@habana.ai> Co-authored-by: Yu-Zhou <yu.zhou@intel.com> Co-authored-by: Ruheena Suhani Shaik <rsshaik@habana.ai> Co-authored-by: Karol Damaszke <kdamaszke@habana.ai> Co-authored-by: Marcin Swiniarski <mswiniarski@habana.ai> Co-authored-by: Woosuk Kwon <woosuk.kwon@berkeley.edu> Co-authored-by: Jacek Czaja <jacek.czaja@intel.com> Co-authored-by: Jacek Czaja <jczaja@habana.ai> Co-authored-by: Yuan <yuan.zhou@outlook.com>
This commit is contained in:
47
setup.py
47
setup.py
@@ -253,6 +253,24 @@ class cmake_build_ext(build_ext):
|
||||
self.copy_file(file, dst_file)
|
||||
|
||||
|
||||
def _is_hpu() -> bool:
|
||||
is_hpu_available = True
|
||||
try:
|
||||
subprocess.run(["hl-smi"], capture_output=True, check=True)
|
||||
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
|
||||
if not os.path.exists('/dev/accel/accel0') and not os.path.exists(
|
||||
'/dev/accel/accel_controlD0'):
|
||||
# last resort...
|
||||
try:
|
||||
output = subprocess.check_output(
|
||||
'lsmod | grep habanalabs | wc -l', shell=True)
|
||||
is_hpu_available = int(output) > 0
|
||||
except (ValueError, FileNotFoundError, PermissionError,
|
||||
subprocess.CalledProcessError):
|
||||
is_hpu_available = False
|
||||
return is_hpu_available or VLLM_TARGET_DEVICE == "hpu"
|
||||
|
||||
|
||||
def _no_device() -> bool:
|
||||
return VLLM_TARGET_DEVICE == "empty"
|
||||
|
||||
@@ -260,7 +278,7 @@ def _no_device() -> bool:
|
||||
def _is_cuda() -> bool:
|
||||
has_cuda = torch.version.cuda is not None
|
||||
return (VLLM_TARGET_DEVICE == "cuda" and has_cuda
|
||||
and not (_is_neuron() or _is_tpu()))
|
||||
and not (_is_neuron() or _is_tpu() or _is_hpu()))
|
||||
|
||||
|
||||
def _is_hip() -> bool:
|
||||
@@ -356,6 +374,23 @@ def get_path(*filepath) -> str:
|
||||
return os.path.join(ROOT_DIR, *filepath)
|
||||
|
||||
|
||||
def get_gaudi_sw_version():
|
||||
"""
|
||||
Returns the driver version.
|
||||
"""
|
||||
# Enable console printing for `hl-smi` check
|
||||
output = subprocess.run("hl-smi",
|
||||
shell=True,
|
||||
text=True,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
env={"ENABLE_CONSOLE": "true"})
|
||||
if output.returncode == 0 and output.stdout:
|
||||
return output.stdout.split("\n")[2].replace(
|
||||
" ", "").split(":")[1][:-1].split("-")[0]
|
||||
return "0.0.0" # when hl-smi is not available
|
||||
|
||||
|
||||
def get_vllm_version() -> str:
|
||||
version = get_version(
|
||||
write_to="vllm/_version.py", # TODO: move this to pyproject.toml
|
||||
@@ -385,6 +420,12 @@ def get_vllm_version() -> str:
|
||||
if neuron_version != MAIN_CUDA_VERSION:
|
||||
neuron_version_str = neuron_version.replace(".", "")[:3]
|
||||
version += f"{sep}neuron{neuron_version_str}"
|
||||
elif _is_hpu():
|
||||
# Get the Intel Gaudi Software Suite version
|
||||
gaudi_sw_version = str(get_gaudi_sw_version())
|
||||
if gaudi_sw_version != MAIN_CUDA_VERSION:
|
||||
gaudi_sw_version = gaudi_sw_version.replace(".", "")[:3]
|
||||
version += f"{sep}gaudi{gaudi_sw_version}"
|
||||
elif _is_openvino():
|
||||
version += f"{sep}openvino"
|
||||
elif _is_tpu():
|
||||
@@ -443,6 +484,8 @@ def get_requirements() -> List[str]:
|
||||
requirements = _read_requirements("requirements-rocm.txt")
|
||||
elif _is_neuron():
|
||||
requirements = _read_requirements("requirements-neuron.txt")
|
||||
elif _is_hpu():
|
||||
requirements = _read_requirements("requirements-hpu.txt")
|
||||
elif _is_openvino():
|
||||
requirements = _read_requirements("requirements-openvino.txt")
|
||||
elif _is_tpu():
|
||||
@@ -453,7 +496,7 @@ def get_requirements() -> List[str]:
|
||||
requirements = _read_requirements("requirements-xpu.txt")
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported platform, please use CUDA, ROCm, Neuron, "
|
||||
"Unsupported platform, please use CUDA, ROCm, Neuron, HPU, "
|
||||
"OpenVINO, or CPU.")
|
||||
return requirements
|
||||
|
||||
|
||||
Reference in New Issue
Block a user