2024-11-07 13:17:29 -05:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
2024-06-18 02:01:25 +08:00
|
|
|
# This script build the CPU docker image and run the offline inference inside the container.
|
|
|
|
|
# It serves a sanity check for compilation and basic model usage.
|
|
|
|
|
set -ex
|
|
|
|
|
|
2025-03-15 04:58:53 -07:00
|
|
|
image_name="xpu/vllm-ci:${BUILDKITE_COMMIT}"
|
|
|
|
|
container_name="xpu_${BUILDKITE_COMMIT}_$(tr -dc A-Za-z0-9 < /dev/urandom | head -c 10; echo)"
|
|
|
|
|
|
2024-06-18 02:01:25 +08:00
|
|
|
# Try building the docker image
|
2026-02-17 21:22:56 +09:00
|
|
|
docker build -t "${image_name}" -f docker/Dockerfile.xpu .
|
2024-06-18 02:01:25 +08:00
|
|
|
|
|
|
|
|
# Setup cleanup
|
2025-07-07 16:16:40 +08:00
|
|
|
remove_docker_container() {
|
|
|
|
|
docker rm -f "${container_name}" || true;
|
2025-03-19 01:29:25 -07:00
|
|
|
docker image rm -f "${image_name}" || true;
|
|
|
|
|
docker system prune -f || true;
|
2025-03-15 04:58:53 -07:00
|
|
|
}
|
2024-06-18 02:01:25 +08:00
|
|
|
trap remove_docker_container EXIT
|
|
|
|
|
|
2024-12-03 01:53:55 +08:00
|
|
|
# Run the image and test offline inference/tensor parallel
|
2025-03-15 04:58:53 -07:00
|
|
|
docker run \
|
2025-10-30 11:17:13 +08:00
|
|
|
--device /dev/dri:/dev/dri \
|
|
|
|
|
--net=host \
|
|
|
|
|
--ipc=host \
|
|
|
|
|
--privileged \
|
2025-03-15 04:58:53 -07:00
|
|
|
-v /dev/dri/by-path:/dev/dri/by-path \
|
|
|
|
|
--entrypoint="" \
|
2025-08-18 17:47:03 +08:00
|
|
|
-e "HF_TOKEN=${HF_TOKEN}" \
|
|
|
|
|
-e "ZE_AFFINITY_MASK=${ZE_AFFINITY_MASK}" \
|
2025-03-15 04:58:53 -07:00
|
|
|
--name "${container_name}" \
|
|
|
|
|
"${image_name}" \
|
2025-08-18 17:47:03 +08:00
|
|
|
bash -c '
|
|
|
|
|
set -e
|
|
|
|
|
echo $ZE_AFFINITY_MASK
|
2025-09-11 19:22:33 +08:00
|
|
|
pip install tblib==3.1.0
|
2025-09-04 20:41:08 +08:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager
|
2025-11-28 13:51:12 -08:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 -O3 -cc.cudagraph_mode=NONE
|
2025-09-04 20:41:08 +08:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend ray
|
|
|
|
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp
|
2025-12-17 12:49:59 -05:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --attention-backend=TRITON_ATTN
|
2026-02-09 20:17:35 +08:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager --quantization fp8
|
2026-02-06 23:59:53 +08:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model superjob/Qwen3-4B-Instruct-2507-GPTQ-Int4 --block-size 64 --enforce-eager
|
2026-02-04 18:12:25 +08:00
|
|
|
python3 examples/offline_inference/basic/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2
|
|
|
|
|
python3 examples/offline_inference/basic/generate.py --model ibm-research/PowerMoE-3b --block-size 64 --enforce-eager -tp 2 --enable-expert-parallel
|
2025-07-07 16:16:40 +08:00
|
|
|
cd tests
|
2026-02-03 14:46:10 +08:00
|
|
|
pytest -v -s v1/core --ignore=v1/core/test_reset_prefix_cache_e2e.py
|
2025-07-24 11:24:04 +08:00
|
|
|
pytest -v -s v1/engine
|
|
|
|
|
pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py
|
|
|
|
|
pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py
|
|
|
|
|
pytest -v -s v1/structured_output
|
2026-01-28 15:24:13 +08:00
|
|
|
pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py --ignore=v1/spec_decode/test_speculators_eagle3.py --ignore=v1/spec_decode/test_acceptance_length.py
|
2025-12-09 06:46:09 +02:00
|
|
|
pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_example_connector.py --ignore=v1/kv_connector/unit/test_lmcache_integration.py
|
2025-07-24 11:24:04 +08:00
|
|
|
pytest -v -s v1/test_serial_utils.py
|
2024-12-04 01:16:31 +08:00
|
|
|
'
|