44 lines
1.3 KiB
Bash
44 lines
1.3 KiB
Bash
#!/bin/bash
|
|
set -euox pipefail
|
|
export VLLM_CPU_CI_ENV=0
|
|
|
|
echo "--- PP+TP"
|
|
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 &
|
|
server_pid=$!
|
|
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
|
|
vllm bench serve \
|
|
--backend vllm \
|
|
--dataset-name random \
|
|
--model meta-llama/Llama-3.2-3B-Instruct \
|
|
--num-prompts 20 \
|
|
--result-dir ./test_results \
|
|
--result-filename tp_pp.json \
|
|
--save-result \
|
|
--endpoint /v1/completions
|
|
kill -s SIGTERM $server_pid; wait $server_pid || true
|
|
failed_req=$(jq '.failed' ./test_results/tp_pp.json)
|
|
if [ "$failed_req" -ne 0 ]; then
|
|
echo "Some requests were failed!"
|
|
exit 1
|
|
fi
|
|
|
|
echo "--- DP+TP"
|
|
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 &
|
|
server_pid=$!
|
|
timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1
|
|
vllm bench serve \
|
|
--backend vllm \
|
|
--dataset-name random \
|
|
--model meta-llama/Llama-3.2-3B-Instruct \
|
|
--num-prompts 20 \
|
|
--result-dir ./test_results \
|
|
--result-filename dp_pp.json \
|
|
--save-result \
|
|
--endpoint /v1/completions
|
|
kill -s SIGTERM $server_pid; wait $server_pid || true
|
|
failed_req=$(jq '.failed' ./test_results/dp_pp.json)
|
|
if [ "$failed_req" -ne 0 ]; then
|
|
echo "Some requests were failed!"
|
|
exit 1
|
|
fi
|