74 lines
2.0 KiB
Bash
Executable File
74 lines
2.0 KiB
Bash
Executable File
#!/bin/bash
|
|
set -euo pipefail
|
|
|
|
MODEL="${MODEL:-Qwen/Qwen2.5-VL-3B-Instruct}"
|
|
PORT="${PORT:-8000}"
|
|
GPU="${GPU:-0}"
|
|
NUM_PROMPTS="${NUM_PROMPTS:-200}"
|
|
EC_SHARED_STORAGE_PATH="${EC_SHARED_STORAGE_PATH:-/tmp/ec_cache}"
|
|
TIMEOUT="${TIMEOUT:-600}"
|
|
|
|
SERVER_PID=""
|
|
|
|
cleanup() {
|
|
echo "Stopping server..."
|
|
if [[ -n "$SERVER_PID" ]] && kill -0 "$SERVER_PID" 2>/dev/null; then
|
|
kill "$SERVER_PID" 2>/dev/null || true
|
|
wait "$SERVER_PID" 2>/dev/null || true
|
|
fi
|
|
echo "Done."
|
|
}
|
|
trap cleanup EXIT INT TERM
|
|
|
|
wait_for_server() {
|
|
local deadline=$((SECONDS + TIMEOUT))
|
|
echo "Waiting for server on port $PORT..."
|
|
while (( SECONDS < deadline )); do
|
|
if curl -sf "http://localhost:${PORT}/v1/models" > /dev/null 2>&1; then
|
|
echo "Server ready."
|
|
return 0
|
|
fi
|
|
sleep 2
|
|
done
|
|
echo "ERROR: Server did not start within ${TIMEOUT}s"
|
|
return 1
|
|
}
|
|
|
|
rm -rf "$EC_SHARED_STORAGE_PATH"
|
|
mkdir -p "$EC_SHARED_STORAGE_PATH"
|
|
|
|
###############################################################################
|
|
# Start server with ec_both
|
|
###############################################################################
|
|
CUDA_VISIBLE_DEVICES="$GPU" \
|
|
vllm serve "$MODEL" \
|
|
--port "$PORT" \
|
|
--enforce-eager \
|
|
--ec-transfer-config '{
|
|
"ec_connector": "ECExampleConnector",
|
|
"ec_role": "ec_both",
|
|
"ec_connector_extra_config": {
|
|
"shared_storage_path": "'"$EC_SHARED_STORAGE_PATH"'"
|
|
}
|
|
}' \
|
|
"$@" &
|
|
|
|
SERVER_PID=$!
|
|
wait_for_server
|
|
|
|
###############################################################################
|
|
# Benchmark -- dataset contains duplicate images, exercises cache hits
|
|
###############################################################################
|
|
echo "Running benchmark ($NUM_PROMPTS prompts)..."
|
|
vllm bench serve \
|
|
--model "$MODEL" \
|
|
--backend openai-chat \
|
|
--endpoint /v1/chat/completions \
|
|
--dataset-name hf \
|
|
--dataset-path lmarena-ai/VisionArena-Chat \
|
|
--seed 0 \
|
|
--num-prompts "$NUM_PROMPTS" \
|
|
--port "$PORT"
|
|
|
|
echo "Benchmark complete."
|