diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 55678b893..ee13e1aab 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -101,7 +101,7 @@ steps: queue: cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ." - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest" - "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)" env: diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 264c970ef..fc7291972 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -12,9 +12,8 @@ endif() # # Define environment variables for special configurations # -if(DEFINED ENV{VLLM_CPU_AVX512BF16}) - set(ENABLE_AVX512BF16 ON) -endif() +set(ENABLE_AVX512BF16 $ENV{VLLM_CPU_AVX512BF16}) +set(ENABLE_AVX512VNNI $ENV{VLLM_CPU_AVX512VNNI}) include_directories("${CMAKE_SOURCE_DIR}/csrc") @@ -107,10 +106,19 @@ if (AVX512_FOUND AND NOT AVX512_DISABLED) endif() find_isa(${CPUINFO} "avx512_vnni" AVX512VNNI_FOUND) - if (AVX512VNNI_FOUND) - list(APPEND CXX_COMPILE_FLAGS "-mavx512vnni") - set(ENABLE_AVX512VNNI ON) - endif() + if (AVX512VNNI_FOUND OR ENABLE_AVX512VNNI) + if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND + CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3) + list(APPEND CXX_COMPILE_FLAGS "-mavx512vnni") + set(ENABLE_AVX512VNNI ON) + else() + set(ENABLE_AVX512VNNI OFF) + message(WARNING "Disable AVX512-VNNI ISA support, requires gcc/g++ >= 12.3") + endif() + else() + set(ENABLE_AVX512VNNI OFF) + message(WARNING "Disable AVX512-VNNI ISA support, no avx512_vnni found in local CPU flags." " If cross-compilation is required, please set env VLLM_CPU_AVX512VNNI=1.") + endif() elseif (AVX2_FOUND) list(APPEND CXX_COMPILE_FLAGS "-mavx2") @@ -257,6 +265,8 @@ elseif(POWER10_FOUND) ${VLLM_EXT_SRC}) endif() +message(STATUS "CPU extension source files: ${VLLM_EXT_SRC}") + # # Define extension targets # diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 0a756ea72..5da2c9467 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -8,6 +8,8 @@ # Build arguments: # PYTHON_VERSION=3.12 (default)|3.11|3.10|3.9 # VLLM_CPU_DISABLE_AVX512=false (default)|true +# VLLM_CPU_AVX512BF16=false (default)|true +# VLLM_CPU_AVX512VNNI=false (default)|true # ######################### BASE IMAGE ######################### @@ -60,8 +62,14 @@ FROM base AS vllm-build ARG GIT_REPO_CHECK=0 # Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ... -ARG VLLM_CPU_DISABLE_AVX512 +ARG VLLM_CPU_DISABLE_AVX512=0 ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512} +# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ... +ARG VLLM_CPU_AVX512BF16=0 +ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16} +# Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ... +ARG VLLM_CPU_AVX512VNNI=0 +ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI} WORKDIR /workspace/vllm