#!/bin/bash set -euox pipefail export VLLM_CPU_CI_ENV=0 echo "--- PP+TP" vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 & server_pid=$! timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1 vllm bench serve \ --backend vllm \ --dataset-name random \ --model meta-llama/Llama-3.2-3B-Instruct \ --num-prompts 20 \ --result-dir ./test_results \ --result-filename tp_pp.json \ --save-result \ --endpoint /v1/completions kill -s SIGTERM $server_pid; wait $server_pid || true failed_req=$(jq '.failed' ./test_results/tp_pp.json) if [ "$failed_req" -ne 0 ]; then echo "Some requests were failed!" exit 1 fi echo "--- DP+TP" vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 & server_pid=$! timeout 600 bash -c "until curl localhost:8000/v1/models > /dev/null 2>&1; do sleep 1; done" || exit 1 vllm bench serve \ --backend vllm \ --dataset-name random \ --model meta-llama/Llama-3.2-3B-Instruct \ --num-prompts 20 \ --result-dir ./test_results \ --result-filename dp_pp.json \ --save-result \ --endpoint /v1/completions kill -s SIGTERM $server_pid; wait $server_pid || true failed_req=$(jq '.failed' ./test_results/dp_pp.json) if [ "$failed_req" -ne 0 ]; then echo "Some requests were failed!" exit 1 fi