2024-11-07 13:17:29 -05:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
2024-04-02 13:07:30 +08:00
|
|
|
# This script build the CPU docker image and run the offline inference inside the container.
|
|
|
|
|
# It serves a sanity check for compilation and basic model usage.
|
2026-02-05 13:53:48 +08:00
|
|
|
set -euox pipefail
|
2024-04-02 13:07:30 +08:00
|
|
|
|
2024-11-12 18:07:32 +08:00
|
|
|
# allow to bind to different cores
|
|
|
|
|
CORE_RANGE=${CORE_RANGE:-48-95}
|
|
|
|
|
NUMA_NODE=${NUMA_NODE:-1}
|
2026-02-05 13:53:48 +08:00
|
|
|
IMAGE_NAME="cpu-test-$NUMA_NODE"
|
|
|
|
|
TIMEOUT_VAL=$1
|
|
|
|
|
TEST_COMMAND=$2
|
2024-11-12 18:07:32 +08:00
|
|
|
|
2026-02-05 13:53:48 +08:00
|
|
|
# building the docker image
|
|
|
|
|
echo "--- :docker: Building Docker image"
|
|
|
|
|
docker build --progress plain --tag "$IMAGE_NAME" --target vllm-test -f docker/Dockerfile.cpu .
|
2025-03-28 16:36:31 +08:00
|
|
|
|
2024-07-27 04:50:10 +08:00
|
|
|
# Run the image, setting --shm-size=4g for tensor parallel.
|
2026-02-17 21:22:56 +09:00
|
|
|
docker run --rm --cpuset-cpus="$CORE_RANGE" --cpuset-mems="$NUMA_NODE" -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true -e HF_TOKEN -e VLLM_CPU_KVCACHE_SPACE=16 -e VLLM_CPU_CI_ENV=1 -e VLLM_CPU_SIM_MULTI_NUMA=1 --shm-size=4g "$IMAGE_NAME" \
|
|
|
|
|
timeout "$TIMEOUT_VAL" bash -c "set -euox pipefail; echo \"--- Print packages\"; pip list; echo \"--- Running tests\"; ${TEST_COMMAND}"
|