2026-02-03 14:46:10 +08:00
FROM intel/deep-learning-essentials:2025.3.2-0-devel-ubuntu24.04 AS vllm-base
2024-06-18 02:01:25 +08:00
2026-02-06 14:02:33 +08:00
WORKDIR /workspace /
ARG PYTHON_VERSION = 3 .12
ARG PIP_EXTRA_INDEX_URL = "https://download.pytorch.org/whl/xpu"
2025-09-08 10:07:16 +08:00
RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \
2026-02-03 14:46:10 +08:00
add-apt-repository -y ppa:kobuk-team/intel-graphics
2024-06-18 02:01:25 +08:00
2025-08-09 08:03:45 +08:00
RUN apt clean && apt-get update -y && \
2024-09-27 23:45:50 -07:00
apt-get install -y --no-install-recommends --fix-missing \
curl \
ffmpeg \
git \
libsndfile1 \
libsm6 \
libxext6 \
libgl1 \
lsb-release \
2025-11-17 11:01:33 +08:00
libaio-dev \
2024-09-27 23:45:50 -07:00
numactl \
2025-09-08 10:07:16 +08:00
wget \
vim \
python3.12 \
python3.12-dev \
python3-pip
2024-06-18 02:01:25 +08:00
2026-02-03 14:46:10 +08:00
RUN apt update && apt upgrade -y && \
apt install -y libze1 libze-dev libze-intel-gpu1 intel-opencl-icd libze-intel-gpu-raytracing intel-ocloc && \
apt install -y intel-oneapi-compiler-dpcpp-cpp-2025.3
2026-02-06 14:02:33 +08:00
ENV PATH = " /root/.local/bin: $PATH "
ENV VIRTUAL_ENV = "/opt/venv"
ENV UV_PYTHON_INSTALL_DIR = /opt/uv/python
RUN curl -LsSf https://astral.sh/uv/install.sh | sh
RUN uv venv --python ${ PYTHON_VERSION } --seed ${ VIRTUAL_ENV }
ENV PATH = " $VIRTUAL_ENV /bin: $PATH "
2025-11-25 09:34:47 +08:00
# This oneccl contains the BMG support which is not the case for default version of oneapi 2025.2.
2026-02-03 14:46:10 +08:00
ARG ONECCL_INSTALLER = "intel-oneccl-2021.15.7.8_offline.sh"
2025-12-30 14:52:28 +08:00
RUN wget " https://github.com/uxlfoundation/oneCCL/releases/download/2021.15.7/ ${ ONECCL_INSTALLER } " && \
bash " ${ ONECCL_INSTALLER } " -a --silent --eula accept && \
rm " ${ ONECCL_INSTALLER } " && \
2025-11-25 09:34:47 +08:00
echo "source /opt/intel/oneapi/setvars.sh --force" >> /root/.bashrc && \
echo "source /opt/intel/oneapi/ccl/2021.15/env/vars.sh --force" >> /root/.bashrc
2025-12-30 14:52:28 +08:00
RUN rm -f /opt/intel/oneapi/ccl/latest && \
ln -s /opt/intel/oneapi/ccl/2021.15 /opt/intel/oneapi/ccl/latest
2025-09-08 10:07:16 +08:00
SHELL [ "bash" , "-c" ]
CMD [ "bash" , "-c" , "source /root/.bashrc && exec bash" ]
2025-08-09 08:03:45 +08:00
2024-06-18 02:01:25 +08:00
WORKDIR /workspace/vllm
2026-02-06 14:02:33 +08:00
ENV UV_HTTP_TIMEOUT = 500
# Configure package index for XPU
ENV PIP_EXTRA_INDEX_URL = ${ PIP_EXTRA_INDEX_URL }
ENV UV_EXTRA_INDEX_URL = ${ PIP_EXTRA_INDEX_URL }
ENV UV_INDEX_STRATEGY = "unsafe-best-match"
ENV UV_LINK_MODE = "copy"
RUN --mount= type = cache,target= /root/.cache/uv \
--mount= type = bind,src= requirements/common.txt,target= /workspace/vllm/requirements/common.txt \
--mount= type = bind,src= requirements/xpu.txt,target= /workspace/vllm/requirements/xpu.txt \
uv pip install --upgrade pip && \
uv pip install -r requirements/xpu.txt
# used for suffix method speculative decoding
# build deps for proto + nanobind-based extensions to set up the build environment
RUN --mount= type = cache,target= /root/.cache/uv \
uv pip install grpcio-tools protobuf nanobind
# arctic-inference is built from source which needs torch-xpu properly installed first
RUN --mount= type = cache,target= /root/.cache/uv \
source /opt/intel/oneapi/setvars.sh --force && \
source /opt/intel/oneapi/ccl/2021.15/env/vars.sh --force && \
export CMAKE_PREFIX_PATH = " $( python -c 'import site; print(site.getsitepackages()[0])' ) : ${ CMAKE_PREFIX_PATH } " && \
uv pip install --no-build-isolation arctic-inference= = 0.1.1
2025-12-23 13:22:15 +08:00
2024-11-07 09:29:03 +08:00
ENV LD_LIBRARY_PATH = " $LD_LIBRARY_PATH :/usr/local/lib/ "
2024-10-17 19:25:06 +02:00
COPY . .
2025-03-11 10:11:47 -07:00
ARG GIT_REPO_CHECK = 0
2024-10-17 19:25:06 +02:00
RUN --mount= type = bind,source= .git,target= .git \
if [ " $GIT_REPO_CHECK " != 0 ] ; then bash tools/check_repo.sh; fi
2024-09-27 23:45:50 -07:00
ENV VLLM_TARGET_DEVICE = xpu
2025-06-27 00:27:18 +08:00
ENV VLLM_WORKER_MULTIPROC_METHOD = spawn
2024-06-18 02:01:25 +08:00
2026-02-06 14:02:33 +08:00
RUN --mount= type = cache,target= /root/.cache/uv \
2024-09-23 18:44:26 +02:00
--mount= type = bind,source= .git,target= .git \
2026-02-06 14:02:33 +08:00
uv pip install --no-build-isolation .
2024-06-18 02:01:25 +08:00
CMD [ "/bin/bash" ]
2024-09-27 23:45:50 -07:00
FROM vllm-base AS vllm-openai
# install additional dependencies for openai api server
2026-02-06 14:02:33 +08:00
RUN --mount= type = cache,target= /root/.cache/uv \
uv pip install accelerate hf_transfer pytest pytest_asyncio lm_eval[ api] modelscope
2024-09-27 23:45:50 -07:00
2024-11-26 00:20:04 -08:00
# install development dependencies (for testing)
2026-02-06 14:02:33 +08:00
RUN uv pip install -e tests/vllm_test_utils
2025-10-11 13:15:23 +08:00
# install nixl from source code
2025-11-17 11:01:33 +08:00
ENV NIXL_VERSION = 0 .7.0
2026-02-06 14:02:33 +08:00
RUN python /workspace/vllm/tools/install_nixl_from_source_ubuntu.py
2025-10-11 13:15:23 +08:00
2026-02-03 14:46:10 +08:00
# FIX triton
2026-02-06 14:02:33 +08:00
RUN --mount= type = cache,target= /root/.cache/uv \
uv pip uninstall triton triton-xpu && \
uv pip install triton-xpu= = 3.6.0
2025-12-15 13:32:06 +08:00
2025-11-25 09:34:47 +08:00
# remove torch bundled oneccl to avoid conflicts
2026-02-06 14:02:33 +08:00
RUN --mount= type = cache,target= /root/.cache/uv \
uv pip uninstall oneccl oneccl-devel
2025-11-03 15:36:59 +08:00
2025-10-03 01:04:57 +08:00
ENTRYPOINT [ "vllm" , "serve" ]