[CPU] Improve CPU Docker build (#30953)

Signed-off-by: Maryam Tahhan <mtahhan@redhat.com>
Co-authored-by: Li, Jiang <jiang1.li@intel.com>
This commit is contained in:
Maryam Tahhan
2026-01-24 17:08:24 +00:00
committed by GitHub
parent 17ab54de81
commit 203d0bc0c2
3 changed files with 123 additions and 17 deletions

View File

@@ -13,6 +13,8 @@ endif()
#
# Define environment variables for special configurations
#
set(ENABLE_AVX2 $ENV{VLLM_CPU_AVX2})
set(ENABLE_AVX512 $ENV{VLLM_CPU_AVX512})
set(ENABLE_AVX512BF16 $ENV{VLLM_CPU_AVX512BF16})
set(ENABLE_AVX512VNNI $ENV{VLLM_CPU_AVX512VNNI})
set(ENABLE_AMXBF16 $ENV{VLLM_CPU_AMXBF16})
@@ -103,6 +105,16 @@ else()
find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support
find_isa(${CPUINFO} "S390" S390_FOUND)
find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support
# Support cross-compilation by allowing override via environment variables
if (ENABLE_AVX2)
set(AVX2_FOUND ON)
message(STATUS "AVX2 support enabled via VLLM_CPU_AVX2 environment variable")
endif()
if (ENABLE_AVX512)
set(AVX512_FOUND ON)
message(STATUS "AVX512 support enabled via VLLM_CPU_AVX512 environment variable")
endif()
endif()
if (AVX512_FOUND AND NOT AVX512_DISABLED)

View File

@@ -15,9 +15,11 @@
# Build arguments:
# PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
# VLLM_CPU_DISABLE_AVX512=false (default)|true
# VLLM_CPU_AVX512BF16=false (default)|true
# VLLM_CPU_AVX512VNNI=false (default)|true
# VLLM_CPU_AMXBF16=false |true (default)
# VLLM_CPU_AVX2=false (default)|true (for cross-compilation)
# VLLM_CPU_AVX512=false (default)|true (for cross-compilation)
# VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation)
# VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation)
# VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation)
#
######################### COMMON BASE IMAGE #########################
@@ -54,9 +56,12 @@ ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE="copy"
# Copy requirements files for installation
COPY requirements/common.txt requirements/common.txt
COPY requirements/cpu.txt requirements/cpu.txt
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \
--mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \
uv pip install --upgrade pip && \
uv pip install -r requirements/cpu.txt
@@ -88,6 +93,12 @@ ARG GIT_REPO_CHECK=0
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
ARG VLLM_CPU_DISABLE_AVX512=0
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
# Support for cross-compilation with AVX2 ISA: docker build --build-arg VLLM_CPU_AVX2="1" ...
ARG VLLM_CPU_AVX2=0
ENV VLLM_CPU_AVX2=${VLLM_CPU_AVX2}
# Support for cross-compilation with AVX512 ISA: docker build --build-arg VLLM_CPU_AVX512="1" ...
ARG VLLM_CPU_AVX512=0
ENV VLLM_CPU_AVX512=${VLLM_CPU_AVX512}
# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
ARG VLLM_CPU_AVX512BF16=0
ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
@@ -100,18 +111,19 @@ ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}
WORKDIR /workspace/vllm
# Copy build requirements
COPY requirements/cpu-build.txt requirements/build.txt
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,src=requirements/cpu-build.txt,target=requirements/build.txt \
uv pip install -r requirements/build.txt
COPY . .
RUN --mount=type=bind,source=.git,target=.git \
if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
RUN if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \
--mount=type=bind,source=.git,target=.git \
VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38
######################### TEST DEPS #########################
@@ -119,9 +131,11 @@ FROM base AS vllm-test-deps
WORKDIR /workspace/vllm
# Copy test requirements
COPY requirements/test.in requirements/cpu-test.in
# TODO: Update to 2.9.0 when there is a new build for intel_extension_for_pytorch for that version
RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \
cp requirements/test.in requirements/cpu-test.in && \
RUN \
sed -i '/mamba_ssm/d' requirements/cpu-test.in && \
remove_packages_not_supported_on_aarch64() { \
case "$(uname -m)" in \
@@ -200,4 +214,29 @@ RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \
uv pip install dist/*.whl
# Add labels to document build configuration
LABEL org.opencontainers.image.title="vLLM CPU"
LABEL org.opencontainers.image.description="vLLM inference engine for CPU platforms"
LABEL org.opencontainers.image.vendor="vLLM Project"
LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm"
# Build configuration labels
ARG TARGETARCH
ARG VLLM_CPU_DISABLE_AVX512
ARG VLLM_CPU_AVX2
ARG VLLM_CPU_AVX512
ARG VLLM_CPU_AVX512BF16
ARG VLLM_CPU_AVX512VNNI
ARG VLLM_CPU_AMXBF16
ARG PYTHON_VERSION
LABEL ai.vllm.build.target-arch="${TARGETARCH}"
LABEL ai.vllm.build.cpu-disable-avx512="${VLLM_CPU_DISABLE_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx2="${VLLM_CPU_AVX2:-false}"
LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}"
LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}"
LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}"
LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}"
ENTRYPOINT ["vllm", "serve"]

View File

@@ -164,21 +164,76 @@ uv pip install dist/*.whl
[https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo)
!!! warning
If deploying the pre-built images on machines without `avx512f`, `avx512_bf16`, or `avx512_vnni` support, an `Illegal instruction` error may be raised. It is recommended to build images for these machines with the appropriate build arguments (e.g., `--build-arg VLLM_CPU_DISABLE_AVX512=true`, `--build-arg VLLM_CPU_AVX512BF16=false`, or `--build-arg VLLM_CPU_AVX512VNNI=false`) to disable unsupported features. Please note that without `avx512f`, AVX2 will be used and this version is not recommended because it only has basic feature support.
If deploying the pre-built images on machines without `avx512f`, `avx512_bf16`, or `avx512_vnni` support, an `Illegal instruction` error may be raised. See the build-image-from-source section below for build arguments to match your target CPU capabilities.
# --8<-- [end:pre-built-images]
# --8<-- [start:build-image-from-source]
## Building for your target CPU
vLLM supports building Docker images for x86 CPU platforms with automatic instruction set detection.
### Basic build command
```bash
docker build -f docker/Dockerfile.cpu \
--build-arg VLLM_CPU_AVX512BF16=false (default)|true \
--build-arg VLLM_CPU_AVX512VNNI=false (default)|true \
--build-arg VLLM_CPU_AMXBF16=false|true (default) \
--build-arg VLLM_CPU_DISABLE_AVX512=false (default)|true \
--build-arg VLLM_CPU_DISABLE_AVX512=<false (default)|true> \
--build-arg VLLM_CPU_AVX2=<false (default)|true> \
--build-arg VLLM_CPU_AVX512=<false (default)|true> \
--build-arg VLLM_CPU_AVX512BF16=<false (default)|true> \
--build-arg VLLM_CPU_AVX512VNNI=<false (default)|true> \
--build-arg VLLM_CPU_AMXBF16=<false|true (default)> \
--tag vllm-cpu-env \
--target vllm-openai .
```
# Launching OpenAI server
!!! note "Instruction set auto-detection"
By default, vLLM will auto-detect CPU instruction sets (AVX512, AVX2, etc.) from the build system's CPU flags. Build arguments like `VLLM_CPU_AVX2`, `VLLM_CPU_AVX512`, `VLLM_CPU_AVX512BF16`, `VLLM_CPU_AVX512VNNI`, and `VLLM_CPU_AMXBF16` are primarily used for **cross-compilation** or for building container images on systems that don't have the target platforms ISA:
- Set `VLLM_CPU_{ISA}=true` to force-enable an instruction set (for cross-compilation to target platforms with that ISA)
- Set `VLLM_CPU_{ISA}=false` to rely on auto-detection
- When an ISA build arg is set to `true`, vLLM will build with that instruction set regardless of the build system's CPU capabilities
### Build examples
**Example 1: Auto-detection (native build)**
Build on a machine with the same CPU as your target deployment:
```bash
# Auto-detects all CPU features from the build system
docker build -f docker/Dockerfile.cpu \
--tag vllm-cpu-env \
--target vllm-openai .
```
**Example 2: Cross-compilation for AVX512 deployment**
Build an AVX512 image on any x86_64 system (even without AVX512):
```bash
docker build -f docker/Dockerfile.cpu \
--build-arg VLLM_CPU_AVX512=true \
--build-arg VLLM_CPU_AVX512BF16=true \
--build-arg VLLM_CPU_AVX512VNNI=true \
--tag vllm-cpu-avx512 \
--target vllm-openai .
```
**Example 3: Cross-compilation for AVX2 deployment**
Build an AVX2 image for older CPUs:
```bash
docker build -f docker/Dockerfile.cpu \
--build-arg VLLM_CPU_AVX2=true \
--tag vllm-cpu-avx2 \
--target vllm-openai .
```
## Launching the OpenAI server
```bash
docker run --rm \
--security-opt seccomp=unconfined \
--cap-add SYS_NICE \