27 lines
781 B
Bash
27 lines
781 B
Bash
#!/bin/bash
|
|
set -euox pipefail
|
|
|
|
echo "--- PP+TP"
|
|
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -pp=2 &
|
|
server_pid=$!
|
|
timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1
|
|
vllm bench serve \
|
|
--backend vllm \
|
|
--dataset-name random \
|
|
--model meta-llama/Llama-3.2-3B-Instruct \
|
|
--num-prompts 20 \
|
|
--endpoint /v1/completions
|
|
kill -s SIGTERM $server_pid &
|
|
|
|
echo "--- DP+TP"
|
|
vllm serve meta-llama/Llama-3.2-3B-Instruct -tp=2 -dp=2 &
|
|
server_pid=$!
|
|
timeout 600 bash -c "until curl localhost:8000/v1/models; do sleep 1; done" || exit 1
|
|
vllm bench serve \
|
|
--backend vllm \
|
|
--dataset-name random \
|
|
--model meta-llama/Llama-3.2-3B-Instruct \
|
|
--num-prompts 20 \
|
|
--endpoint /v1/completions
|
|
kill -s SIGTERM $server_pid &
|