diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index a50731d70..6da4f6c0c 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -13,6 +13,8 @@ endif() # # Define environment variables for special configurations # +set(ENABLE_AVX2 $ENV{VLLM_CPU_AVX2}) +set(ENABLE_AVX512 $ENV{VLLM_CPU_AVX512}) set(ENABLE_AVX512BF16 $ENV{VLLM_CPU_AVX512BF16}) set(ENABLE_AVX512VNNI $ENV{VLLM_CPU_AVX512VNNI}) set(ENABLE_AMXBF16 $ENV{VLLM_CPU_AMXBF16}) @@ -103,6 +105,16 @@ else() find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support find_isa(${CPUINFO} "S390" S390_FOUND) find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support + + # Support cross-compilation by allowing override via environment variables + if (ENABLE_AVX2) + set(AVX2_FOUND ON) + message(STATUS "AVX2 support enabled via VLLM_CPU_AVX2 environment variable") + endif() + if (ENABLE_AVX512) + set(AVX512_FOUND ON) + message(STATUS "AVX512 support enabled via VLLM_CPU_AVX512 environment variable") + endif() endif() if (AVX512_FOUND AND NOT AVX512_DISABLED) diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 9bf52d21d..bd6f75078 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -15,9 +15,11 @@ # Build arguments: # PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10 # VLLM_CPU_DISABLE_AVX512=false (default)|true -# VLLM_CPU_AVX512BF16=false (default)|true -# VLLM_CPU_AVX512VNNI=false (default)|true -# VLLM_CPU_AMXBF16=false |true (default) +# VLLM_CPU_AVX2=false (default)|true (for cross-compilation) +# VLLM_CPU_AVX512=false (default)|true (for cross-compilation) +# VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation) +# VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation) +# VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation) # ######################### COMMON BASE IMAGE ######################### @@ -54,9 +56,12 @@ ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_INDEX_STRATEGY="unsafe-best-match" ENV UV_LINK_MODE="copy" + +# Copy requirements files for installation +COPY requirements/common.txt requirements/common.txt +COPY requirements/cpu.txt requirements/cpu.txt + RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,src=requirements/common.txt,target=requirements/common.txt \ - --mount=type=bind,src=requirements/cpu.txt,target=requirements/cpu.txt \ uv pip install --upgrade pip && \ uv pip install -r requirements/cpu.txt @@ -88,6 +93,12 @@ ARG GIT_REPO_CHECK=0 # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... ARG VLLM_CPU_DISABLE_AVX512=0 ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} +# Support for cross-compilation with AVX2 ISA: docker build --build-arg VLLM_CPU_AVX2="1" ... +ARG VLLM_CPU_AVX2=0 +ENV VLLM_CPU_AVX2=${VLLM_CPU_AVX2} +# Support for cross-compilation with AVX512 ISA: docker build --build-arg VLLM_CPU_AVX512="1" ... +ARG VLLM_CPU_AVX512=0 +ENV VLLM_CPU_AVX512=${VLLM_CPU_AVX512} # Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ... ARG VLLM_CPU_AVX512BF16=0 ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16} @@ -100,18 +111,19 @@ ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16} WORKDIR /workspace/vllm +# Copy build requirements +COPY requirements/cpu-build.txt requirements/build.txt + RUN --mount=type=cache,target=/root/.cache/uv \ - --mount=type=bind,src=requirements/cpu-build.txt,target=requirements/build.txt \ uv pip install -r requirements/build.txt COPY . . -RUN --mount=type=bind,source=.git,target=.git \ - if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi + +RUN if [ "$GIT_REPO_CHECK" != 0 ]; then bash tools/check_repo.sh ; fi RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \ - --mount=type=bind,source=.git,target=.git \ VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 ######################### TEST DEPS ######################### @@ -119,9 +131,11 @@ FROM base AS vllm-test-deps WORKDIR /workspace/vllm +# Copy test requirements +COPY requirements/test.in requirements/cpu-test.in + # TODO: Update to 2.9.0 when there is a new build for intel_extension_for_pytorch for that version -RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \ - cp requirements/test.in requirements/cpu-test.in && \ +RUN \ sed -i '/mamba_ssm/d' requirements/cpu-test.in && \ remove_packages_not_supported_on_aarch64() { \ case "$(uname -m)" in \ @@ -200,4 +214,29 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=bind,from=vllm-build,src=/workspace/vllm/dist,target=dist \ uv pip install dist/*.whl +# Add labels to document build configuration +LABEL org.opencontainers.image.title="vLLM CPU" +LABEL org.opencontainers.image.description="vLLM inference engine for CPU platforms" +LABEL org.opencontainers.image.vendor="vLLM Project" +LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm" + +# Build configuration labels +ARG TARGETARCH +ARG VLLM_CPU_DISABLE_AVX512 +ARG VLLM_CPU_AVX2 +ARG VLLM_CPU_AVX512 +ARG VLLM_CPU_AVX512BF16 +ARG VLLM_CPU_AVX512VNNI +ARG VLLM_CPU_AMXBF16 +ARG PYTHON_VERSION + +LABEL ai.vllm.build.target-arch="${TARGETARCH}" +LABEL ai.vllm.build.cpu-disable-avx512="${VLLM_CPU_DISABLE_AVX512:-false}" +LABEL ai.vllm.build.cpu-avx2="${VLLM_CPU_AVX2:-false}" +LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}" +LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}" +LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}" +LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}" +LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}" + ENTRYPOINT ["vllm", "serve"] diff --git a/docs/getting_started/installation/cpu.x86.inc.md b/docs/getting_started/installation/cpu.x86.inc.md index 013750bc5..16a010e04 100644 --- a/docs/getting_started/installation/cpu.x86.inc.md +++ b/docs/getting_started/installation/cpu.x86.inc.md @@ -164,21 +164,76 @@ uv pip install dist/*.whl [https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo) !!! warning - If deploying the pre-built images on machines without `avx512f`, `avx512_bf16`, or `avx512_vnni` support, an `Illegal instruction` error may be raised. It is recommended to build images for these machines with the appropriate build arguments (e.g., `--build-arg VLLM_CPU_DISABLE_AVX512=true`, `--build-arg VLLM_CPU_AVX512BF16=false`, or `--build-arg VLLM_CPU_AVX512VNNI=false`) to disable unsupported features. Please note that without `avx512f`, AVX2 will be used and this version is not recommended because it only has basic feature support. + If deploying the pre-built images on machines without `avx512f`, `avx512_bf16`, or `avx512_vnni` support, an `Illegal instruction` error may be raised. See the build-image-from-source section below for build arguments to match your target CPU capabilities. # --8<-- [end:pre-built-images] # --8<-- [start:build-image-from-source] +## Building for your target CPU + +vLLM supports building Docker images for x86 CPU platforms with automatic instruction set detection. + +### Basic build command + ```bash docker build -f docker/Dockerfile.cpu \ - --build-arg VLLM_CPU_AVX512BF16=false (default)|true \ - --build-arg VLLM_CPU_AVX512VNNI=false (default)|true \ - --build-arg VLLM_CPU_AMXBF16=false|true (default) \ - --build-arg VLLM_CPU_DISABLE_AVX512=false (default)|true \ + --build-arg VLLM_CPU_DISABLE_AVX512= \ + --build-arg VLLM_CPU_AVX2= \ + --build-arg VLLM_CPU_AVX512= \ + --build-arg VLLM_CPU_AVX512BF16= \ + --build-arg VLLM_CPU_AVX512VNNI= \ + --build-arg VLLM_CPU_AMXBF16= \ --tag vllm-cpu-env \ --target vllm-openai . +``` -# Launching OpenAI server +!!! note "Instruction set auto-detection" + By default, vLLM will auto-detect CPU instruction sets (AVX512, AVX2, etc.) from the build system's CPU flags. Build arguments like `VLLM_CPU_AVX2`, `VLLM_CPU_AVX512`, `VLLM_CPU_AVX512BF16`, `VLLM_CPU_AVX512VNNI`, and `VLLM_CPU_AMXBF16` are primarily used for **cross-compilation** or for building container images on systems that don't have the target platforms ISA: + + - Set `VLLM_CPU_{ISA}=true` to force-enable an instruction set (for cross-compilation to target platforms with that ISA) + - Set `VLLM_CPU_{ISA}=false` to rely on auto-detection + - When an ISA build arg is set to `true`, vLLM will build with that instruction set regardless of the build system's CPU capabilities + +### Build examples + +**Example 1: Auto-detection (native build)** + +Build on a machine with the same CPU as your target deployment: + +```bash +# Auto-detects all CPU features from the build system +docker build -f docker/Dockerfile.cpu \ + --tag vllm-cpu-env \ + --target vllm-openai . +``` + +**Example 2: Cross-compilation for AVX512 deployment** + +Build an AVX512 image on any x86_64 system (even without AVX512): + +```bash +docker build -f docker/Dockerfile.cpu \ + --build-arg VLLM_CPU_AVX512=true \ + --build-arg VLLM_CPU_AVX512BF16=true \ + --build-arg VLLM_CPU_AVX512VNNI=true \ + --tag vllm-cpu-avx512 \ + --target vllm-openai . +``` + +**Example 3: Cross-compilation for AVX2 deployment** + +Build an AVX2 image for older CPUs: + +```bash +docker build -f docker/Dockerfile.cpu \ + --build-arg VLLM_CPU_AVX2=true \ + --tag vllm-cpu-avx2 \ + --target vllm-openai . +``` + +## Launching the OpenAI server + +```bash docker run --rm \ --security-opt seccomp=unconfined \ --cap-add SYS_NICE \