[UX] Improve UX of CPU backend (#36968)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
Signed-off-by: Li, Jiang <bigpyj64@gmail.com>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
Li, Jiang
2026-03-14 09:27:29 +08:00
committed by GitHub
parent f680dc1b39
commit 092ace9e3a
10 changed files with 174 additions and 118 deletions

View File

@@ -21,6 +21,20 @@ steps:
pytest -x -v -s tests/kernels/moe/test_cpu_fused_moe.py
pytest -x -v -s tests/kernels/test_onednn.py"
- label: CPU-Compatibility Tests
depends_on: []
soft_fail: true
device: intel_cpu
no_plugin: true
source_file_dependencies:
- cmake/cpu_extension.cmake
- setup.py
- vllm/platforms/cpu.py
commands:
- |
bash .buildkite/scripts/hardware_ci/run-cpu-test.sh 20m "
bash .buildkite/scripts/hardware_ci/run-cpu-compatibility-test.sh"
- label: CPU-Language Generation and Pooling Model Tests
depends_on: []
soft_fail: true

View File

@@ -25,9 +25,7 @@ fi
docker build --file docker/Dockerfile.cpu \
--build-arg max_jobs=16 \
--build-arg buildkite_commit="$BUILDKITE_COMMIT" \
--build-arg VLLM_CPU_AVX512BF16=true \
--build-arg VLLM_CPU_AVX512VNNI=true \
--build-arg VLLM_CPU_AMXBF16=true \
--build-arg VLLM_CPU_X86=true \
--tag "$REGISTRY"/"$REPO":"$BUILDKITE_COMMIT"-cpu \
--target vllm-test \
--progress plain .

View File

@@ -83,7 +83,7 @@ steps:
agents:
queue: cpu_queue_postmerge
commands:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --build-arg VLLM_CPU_AMXBF16=true --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ."
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag vllm-ci:build-image --target vllm-build --progress plain -f docker/Dockerfile.cpu ."
- "mkdir artifacts"
- "docker run --rm -v $(pwd)/artifacts:/artifacts_host vllm-ci:build-image bash -c 'cp -r dist /artifacts_host && chmod -R a+rw /artifacts_host'"
- "bash .buildkite/scripts/upload-nightly-wheels.sh manylinux_2_35"
@@ -152,7 +152,7 @@ steps:
queue: cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_AVX512BF16=true --build-arg VLLM_CPU_AVX512VNNI=true --build-arg VLLM_CPU_AMXBF16=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg GIT_REPO_CHECK=1 --build-arg VLLM_CPU_X86=true --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version) --tag public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest --progress plain --target vllm-openai -f docker/Dockerfile.cpu ."
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:latest"
- "docker push public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:$(buildkite-agent meta-data get release-version)"
env:

View File

@@ -0,0 +1,65 @@
#!/bin/bash
set -euox pipefail
export VLLM_CPU_KVCACHE_SPACE=1
export VLLM_CPU_CI_ENV=1
# Reduce sub-processes for acceleration
export TORCH_COMPILE_DISABLE=1
export VLLM_ENABLE_V1_MULTIPROCESSING=0
SDE_ARCHIVE="sde-external-10.7.0-2026-02-18-lin.tar.xz"
SDE_CHECKSUM="CA3D4086DE4ACB3FAEDF9F57B541C6936B7D5E19AE2BF763B6EA933573A0A217"
wget "https://downloadmirror.intel.com/913594/${SDE_ARCHIVE}"
echo "${SDE_CHECKSUM} ${SDE_ARCHIVE}" | sha256sum --check
mkdir -p sde
tar -xvf "./${SDE_ARCHIVE}" --strip-components=1 -C ./sde/
wait_for_pid_and_check_log() {
local pid="$1"
local log_file="$2"
local exit_status
if [ -z "$pid" ] || [ -z "$log_file" ]; then
echo "Usage: wait_for_pid_and_check_log <PID> <LOG_FILE>"
return 1
fi
echo "Waiting for process $pid to finish..."
# Use the 'wait' command to pause the script until the specific PID exits.
# The 'wait' command's own exit status will be that of the waited-for process.
if wait "$pid"; then
exit_status=$?
echo "Process $pid finished with exit status $exit_status (Success)."
else
exit_status=$?
echo "Process $pid finished with exit status $exit_status (Failure)."
fi
if [ "$exit_status" -ne 0 ]; then
echo "Process exited with a non-zero status."
echo "--- Last few lines of log file: $log_file ---"
tail -n 50 "$log_file"
echo "---------------------------------------------"
return 1 # Indicate failure based on exit status
fi
echo "No errors detected in log file and process exited successfully."
return 0
}
# Test Sky Lake (AVX512F)
./sde/sde64 -skl -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 > test_0.log 2>&1 &
PID_TEST_0=$!
# Test Cascade Lake (AVX512F + VNNI)
./sde/sde64 -clx -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 > test_1.log 2>&1 &
PID_TEST_1=$!
# Test Cooper Lake (AVX512F + VNNI + BF16)
./sde/sde64 -cpx -- python3 examples/basic/offline_inference/generate.py --model facebook/opt-125m --dtype bfloat16 > test_2.log 2>&1 &
PID_TEST_2=$!
wait_for_pid_and_check_log $PID_TEST_0 test_0.log
wait_for_pid_and_check_log $PID_TEST_1 test_1.log
wait_for_pid_and_check_log $PID_TEST_2 test_2.log

View File

@@ -102,11 +102,13 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
"-mavx512f"
"-mavx512vl"
"-mavx512bw"
"-mavx512dq"
"-mavx512bf16"
"-mavx512vnni"
"-mavx512dq")
list(APPEND CXX_COMPILE_FLAGS_AVX512_AMX
${CXX_COMPILE_FLAGS_AVX512}
"-mamx-bf16"
"-mamx-tile")
"-mamx-tile"
"-mavx512bf16"
"-mavx512vnni")
list(APPEND CXX_COMPILE_FLAGS_AVX2
"-mavx2")
elseif (POWER9_FOUND OR POWER10_FOUND OR POWER11_FOUND)
@@ -314,7 +316,8 @@ endif()
# TODO: Refactor this
if (ENABLE_X86_ISA)
message(STATUS "CPU extension (AVX512) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
message(STATUS "CPU extension (AVX512F + BF16 + VNNI + AMX) compile flags: ${CXX_COMPILE_FLAGS_AVX512_AMX}")
message(STATUS "CPU extension (AVX512F) compile flags: ${CXX_COMPILE_FLAGS_AVX512}")
message(STATUS "CPU extension (AVX2) compile flags: ${CXX_COMPILE_FLAGS_AVX2}")
else()
message(STATUS "CPU extension compile flags: ${CXX_COMPILE_FLAGS}")
@@ -366,13 +369,15 @@ if(USE_ONEDNN)
endif()
if (ENABLE_X86_ISA)
set(VLLM_EXT_SRC_AVX512
set(VLLM_EXT_SRC_SGL
"csrc/cpu/sgl-kernels/gemm.cpp"
"csrc/cpu/sgl-kernels/gemm_int8.cpp"
"csrc/cpu/sgl-kernels/gemm_fp8.cpp"
"csrc/cpu/sgl-kernels/moe.cpp"
"csrc/cpu/sgl-kernels/moe_int8.cpp"
"csrc/cpu/sgl-kernels/moe_fp8.cpp"
"csrc/cpu/sgl-kernels/moe_fp8.cpp")
set(VLLM_EXT_SRC_AVX512
"csrc/cpu/shm.cpp"
"csrc/cpu/cpu_wna16.cpp"
"csrc/cpu/cpu_fused_moe.cpp"
@@ -398,31 +403,48 @@ if (ENABLE_X86_ISA)
"csrc/cpu/pos_encoding.cpp"
"csrc/moe/dynamic_4bit_int_moe_cpu.cpp")
message(STATUS "CPU extension (AVX512) source files: ${VLLM_EXT_SRC_AVX512}")
message(STATUS "CPU extension (AVX512F + BF16 + VNNI + AMX) source files: ${VLLM_EXT_SRC_AVX512} ${VLLM_EXT_SRC_SGL}")
message(STATUS "CPU extension (AVX512F) source files: ${VLLM_EXT_SRC_AVX512}")
message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")
set(_C_LIBS numa dnnl_ext)
set(_C_AVX512_LIBS numa dnnl_ext)
set(_C_AVX2_LIBS numa)
# AMX + AVX512F + AVX512BF16 + AVX512VNNI
define_extension_target(
_C
DESTINATION vllm
LANGUAGE CXX
SOURCES ${VLLM_EXT_SRC_AVX512} ${VLLM_EXT_SRC_SGL}
LIBRARIES ${_C_LIBS}
COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512_AMX}
USE_SABI 3
WITH_SOABI
)
# For AMX kernels
target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")
# AVX512F
define_extension_target(
_C_AVX512
DESTINATION vllm
LANGUAGE CXX
SOURCES ${VLLM_EXT_SRC_AVX512}
LIBRARIES ${LIBS}
LIBRARIES ${_C_AVX512_LIBS}
COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX512}
USE_SABI 3
WITH_SOABI
)
# For SGL kernels
target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AVX512")
# For AMX kernels
target_compile_definitions(_C PRIVATE "-DCPU_CAPABILITY_AMXBF16")
# AVX2
define_extension_target(
_C_AVX2
DESTINATION vllm
LANGUAGE CXX
SOURCES ${VLLM_EXT_SRC_AVX2}
LIBRARIES ${LIBS}
LIBRARIES ${_C_AVX2_LIBS}
COMPILE_FLAGS ${CXX_COMPILE_FLAGS_AVX2}
USE_SABI 3
WITH_SOABI

View File

@@ -14,12 +14,7 @@
#
# Build arguments:
# PYTHON_VERSION=3.13|3.12 (default)|3.11|3.10
# VLLM_CPU_DISABLE_AVX512=false (default)|true
# VLLM_CPU_AVX2=false (default)|true (for cross-compilation)
# VLLM_CPU_AVX512=false (default)|true (for cross-compilation)
# VLLM_CPU_AVX512BF16=false (default)|true (for cross-compilation)
# VLLM_CPU_AVX512VNNI=false (default)|true (for cross-compilation)
# VLLM_CPU_AMXBF16=false (default)|true (for cross-compilation)
# VLLM_CPU_X86=false (default)|true (for cross-compilation)
# VLLM_CPU_ARM_BF16=false (default)|true (for cross-compilation)
#
@@ -36,7 +31,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update -y \
&& apt-get install -y --no-install-recommends sudo ccache git curl wget ca-certificates \
gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof \
gcc-12 g++-12 libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 jq lsof xz-utils \
&& update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 \
&& curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -91,24 +86,9 @@ ARG max_jobs=32
ENV MAX_JOBS=${max_jobs}
ARG GIT_REPO_CHECK=0
# Support for building with non-AVX512 vLLM: docker build --build-arg VLLM_CPU_DISABLE_AVX512="true" ...
ARG VLLM_CPU_DISABLE_AVX512=0
ENV VLLM_CPU_DISABLE_AVX512=${VLLM_CPU_DISABLE_AVX512}
# Support for cross-compilation with AVX2 ISA: docker build --build-arg VLLM_CPU_AVX2="1" ...
ARG VLLM_CPU_AVX2=0
ENV VLLM_CPU_AVX2=${VLLM_CPU_AVX2}
# Support for cross-compilation with AVX512 ISA: docker build --build-arg VLLM_CPU_AVX512="1" ...
ARG VLLM_CPU_AVX512=0
ENV VLLM_CPU_AVX512=${VLLM_CPU_AVX512}
# Support for building with AVX512BF16 ISA: docker build --build-arg VLLM_CPU_AVX512BF16="true" ...
ARG VLLM_CPU_AVX512BF16=0
ENV VLLM_CPU_AVX512BF16=${VLLM_CPU_AVX512BF16}
# Support for building with AVX512VNNI ISA: docker build --build-arg VLLM_CPU_AVX512VNNI="true" ...
ARG VLLM_CPU_AVX512VNNI=0
ENV VLLM_CPU_AVX512VNNI=${VLLM_CPU_AVX512VNNI}
# Support for building with AMXBF16 ISA: docker build --build-arg VLLM_CPU_AMXBF16="true" ...
ARG VLLM_CPU_AMXBF16=1
ENV VLLM_CPU_AMXBF16=${VLLM_CPU_AMXBF16}
# Support for cross-compilation with x86 ISA including AVX2 and AVX512: docker build --build-arg VLLM_CPU_X86="true" ...
ARG VLLM_CPU_X86=0
ENV VLLM_CPU_X86=${VLLM_CPU_X86}
# Support for cross-compilation with ARM BF16 ISA: docker build --build-arg VLLM_CPU_ARM_BF16="true" ...
ARG VLLM_CPU_ARM_BF16=0
ENV VLLM_CPU_ARM_BF16=${VLLM_CPU_ARM_BF16}
@@ -116,7 +96,7 @@ ENV VLLM_CPU_ARM_BF16=${VLLM_CPU_ARM_BF16}
WORKDIR /vllm-workspace
# Validate build arguments - prevent mixing incompatible ISA flags
RUN if [ "$TARGETARCH" = "arm64" ] && { [ "$VLLM_CPU_AVX2" != "0" ] || [ "$VLLM_CPU_AVX512" != "0" ] || [ "$VLLM_CPU_AVX512BF16" != "0" ] || [ "$VLLM_CPU_AVX512VNNI" != "0" ]; }; then \
RUN if [ "$TARGETARCH" = "arm64" ] && [ "$VLLM_CPU_X86" != "0" ]; then \
echo "ERROR: Cannot use x86-specific ISA flags (AVX2, AVX512, etc.) when building for ARM64 (--platform=linux/arm64)"; \
exit 1; \
fi && \
@@ -174,7 +154,7 @@ WORKDIR /vllm-workspace
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get install -y --no-install-recommends vim numactl xz-utils make clangd-14
apt-get install -y --no-install-recommends vim numactl make clangd-14
RUN ln -s /usr/bin/clangd-14 /usr/bin/clangd
@@ -232,22 +212,12 @@ LABEL org.opencontainers.image.source="https://github.com/vllm-project/vllm"
# Build configuration labels
ARG TARGETARCH
ARG VLLM_CPU_DISABLE_AVX512
ARG VLLM_CPU_AVX2
ARG VLLM_CPU_AVX512
ARG VLLM_CPU_AVX512BF16
ARG VLLM_CPU_AVX512VNNI
ARG VLLM_CPU_AMXBF16
ARG VLLM_CPU_X86
ARG VLLM_CPU_ARM_BF16
ARG PYTHON_VERSION
LABEL ai.vllm.build.target-arch="${TARGETARCH}"
LABEL ai.vllm.build.cpu-disable-avx512="${VLLM_CPU_DISABLE_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx2="${VLLM_CPU_AVX2:-false}"
LABEL ai.vllm.build.cpu-avx512="${VLLM_CPU_AVX512:-false}"
LABEL ai.vllm.build.cpu-avx512bf16="${VLLM_CPU_AVX512BF16:-false}"
LABEL ai.vllm.build.cpu-avx512vnni="${VLLM_CPU_AVX512VNNI:-false}"
LABEL ai.vllm.build.cpu-amxbf16="${VLLM_CPU_AMXBF16:-false}"
LABEL ai.vllm.build.cpu-x86="${VLLM_CPU_X86:-false}"
LABEL ai.vllm.build.cpu-arm-bf16="${VLLM_CPU_ARM_BF16:-false}"
LABEL ai.vllm.build.python-version="${PYTHON_VERSION:-3.12}"

View File

@@ -7,7 +7,7 @@ vLLM supports basic model inferencing and serving on x86 CPU platform, with data
--8<-- [start:requirements]
- OS: Linux
- CPU flags: `avx512f` (Recommended), `avx512_bf16` (Optional), `avx512_vnni` (Optional)
- CPU flags: `avx512f` (Recommended), `avx2` (Limited features)
!!! tip
Use `lscpu` to check the CPU flags.
@@ -18,7 +18,7 @@ vLLM supports basic model inferencing and serving on x86 CPU platform, with data
--8<-- [end:set-up-using-python]
--8<-- [start:pre-built-wheels]
Pre-built vLLM wheels for x86 with AVX512 are available since version 0.13.0. To install release wheels:
Pre-built vLLM wheels for x86 with AVX512/AVX2 are available since version 0.17.0. To install release wheels:
```bash
export VLLM_VERSION=$(curl -s https://api.github.com/repos/vllm-project/vllm/releases/latest | jq -r .tag_name | sed 's/^v//')
@@ -108,13 +108,13 @@ VLLM_TARGET_DEVICE=cpu uv pip install . --no-build-isolation
If you want to develop vLLM, install it in editable mode instead.
```bash
VLLM_TARGET_DEVICE=cpu uv pip install -e . --no-build-isolation
VLLM_TARGET_DEVICE=cpu python3 setup.py develop
```
Optionally, build a portable wheel which you can then install elsewhere:
```bash
VLLM_TARGET_DEVICE=cpu uv build --wheel
VLLM_TARGET_DEVICE=cpu uv build --wheel --no-build-isolation
```
```bash
@@ -185,12 +185,9 @@ docker run \
-v ~/.cache/huggingface:/root/.cache/huggingface \
-p 8000:8000 \
--env "HF_TOKEN=<secret>" \
vllm/vllm-openai-cpu:latest-x86_64 <args...>
vllm/vllm-openai-cpu:latest-x86_64 <args...>
```
!!! warning
If deploying the pre-built images on machines without `avx512f`, `avx512_bf16`, or `avx512_vnni` support, an `Illegal instruction` error may be raised. See the build-image-from-source section below for build arguments to match your target CPU capabilities.
--8<-- [end:pre-built-images]
--8<-- [start:build-image-from-source]
@@ -198,50 +195,11 @@ vllm/vllm-openai-cpu:latest-x86_64 <args...>
```bash
docker build -f docker/Dockerfile.cpu \
--build-arg VLLM_CPU_DISABLE_AVX512=<false (default)|true> \
--build-arg VLLM_CPU_AVX2=<false (default)|true> \
--build-arg VLLM_CPU_AVX512=<false (default)|true> \
--build-arg VLLM_CPU_AVX512BF16=<false (default)|true> \
--build-arg VLLM_CPU_AVX512VNNI=<false (default)|true> \
--build-arg VLLM_CPU_AMXBF16=<false|true (default)> \
--build-arg VLLM_CPU_X86=<false (default)|true> \ # For cross-compilation
--tag vllm-cpu-env \
--target vllm-openai .
```
!!! note "Auto-detection by default"
By default, CPU instruction sets (AVX512, AVX2, etc.) are automatically detected from the build system's CPU flags. Build arguments like `VLLM_CPU_AVX2`, `VLLM_CPU_AVX512`, `VLLM_CPU_AVX512BF16`, `VLLM_CPU_AVX512VNNI`, and `VLLM_CPU_AMXBF16` are used for cross-compilation:
- `VLLM_CPU_{ISA}=true` - Force-enable the instruction set (build with ISA regardless of build system capabilities)
- `VLLM_CPU_{ISA}=false` - Rely on auto-detection (default)
##### Examples
###### Auto-detection build (default)
```bash
docker build -f docker/Dockerfile.cpu --tag vllm-cpu-env --target vllm-openai .
```
###### Cross-compile for AVX512
```bash
docker build -f docker/Dockerfile.cpu \
--build-arg VLLM_CPU_AVX512=true \
--build-arg VLLM_CPU_AVX512BF16=true \
--build-arg VLLM_CPU_AVX512VNNI=true \
--tag vllm-cpu-avx512 \
--target vllm-openai .
```
###### Cross-compile for AVX2
```bash
docker build -f docker/Dockerfile.cpu \
--build-arg VLLM_CPU_AVX2=true \
--tag vllm-cpu-avx2 \
--target vllm-openai .
```
#### Launching the OpenAI server
```bash

View File

@@ -920,6 +920,7 @@ if _is_cpu():
if platform.machine() in ("x86_64", "AMD64"):
ext_modules.append(CMakeExtension(name="vllm._C"))
ext_modules.append(CMakeExtension(name="vllm._C_AVX512"))
ext_modules.append(CMakeExtension(name="vllm._C_AVX2"))
else:
ext_modules.append(CMakeExtension(name="vllm._C"))

View File

@@ -252,6 +252,8 @@ class CpuPlatform(Platform):
if vllm_config.lora_config is not None:
compilation_config.mode = CompilationMode.NONE
vllm_config.profiler_config.torch_profiler_dump_cuda_time_total = False
assert vllm_config.device_config.device_type == "cpu"
#
@@ -470,21 +472,32 @@ class CpuPlatform(Platform):
@classmethod
def import_kernels(cls) -> None:
if Platform.get_cpu_architecture() in (CpuArchEnum.X86,):
if torch._C._cpu._is_avx512_supported():
try:
import vllm._C # noqa: F401
except ImportError as e:
logger.warning("Failed to import from vllm._C: %r", e)
# Note: The lib name is _C_AVX2/AVX512, but the module name is _C.
# This will cause a exception "dynamic module does define
# module export function". But the library is imported
# successfully. So ignore the exception for now, until we find
# a solution.
ignored_msg = "dynamic module does not define module export function"
if torch.cpu._is_avx512_supported():
if torch.cpu._is_avx512_bf16_supported():
try:
import vllm._C # noqa: F401
except ImportError as e:
logger.warning("Failed to import from vllm._C: %r", e)
else:
try:
import vllm._C_AVX512 # noqa: F401
except ImportError as e:
if ignored_msg not in e.msg:
logger.warning(
"Failed to import from vllm._C_AVX512: %r", e
)
else:
# Note: The lib name is _C_AVX2, but the module name is _C.
# This will cause a exception "dynamic module does define
# module export function". But the library is imported
# successfully. So ignore the exception for now, until we find
# a solution.
try:
import vllm._C_AVX2 # noqa: F401
except ImportError as e:
logger.warning("Failed to import from vllm._C_AVX2: %r", e)
if ignored_msg not in e.msg:
logger.warning("Failed to import from vllm._C_AVX2: %r", e)
else:
try:
import vllm._C # noqa: F401

View File

@@ -52,6 +52,21 @@ class CPUWorker(Worker):
)
def init_device(self):
# Check whether critical libraries are loaded
def check_preloaded_libs(name: str):
ld_preload_list = os.environ.get("LD_PRELOAD", "")
if name not in ld_preload_list:
raise RuntimeError(
f"{name} is not found in LD_PRELOAD. "
"Please follow the section `set LD_PRELOAD` in "
"https://docs.vllm.ai/en/latest/getting_started/installation/cpu/ "
"to setup required pre-loaded libraries."
)
check_preloaded_libs("libtcmalloc")
if current_platform.get_cpu_architecture() == CpuArchEnum.X86:
check_preloaded_libs("libiomp")
# Setup OpenMP threads affinity.
omp_cpuids = envs.VLLM_CPU_OMP_THREADS_BIND
# Under numa binding some cores reserved for kv transfer in nixl_connector.py