[perf bench] H200 development (#9768)

Signed-off-by: simon-mo <simon.mo@hey.com>
This commit is contained in:
Simon Mo
2024-11-20 11:06:56 -08:00
committed by GitHub
parent 772a66732d
commit 5f1d6af2b6
3 changed files with 32 additions and 7 deletions

View File

@@ -6,6 +6,7 @@
# Do not set -e, as the mixtral 8x22B model tends to crash occasionally
# and we still want to see other benchmarking results even when mixtral crashes.
set -x
set -o pipefail
check_gpus() {
@@ -85,11 +86,7 @@ kill_gpu_processes() {
ps -aux
lsof -t -i:8000 | xargs -r kill -9
pkill -f pt_main_thread
# this line doesn't work now
# ps aux | grep python | grep openai | awk '{print $2}' | xargs -r kill -9
pkill -f python3
pkill -f /usr/bin/python3
pgrep python3 | xargs -r kill -9
# wait until GPU memory usage smaller than 1GB
@@ -289,7 +286,7 @@ run_serving_tests() {
# run the server
echo "Running test case $test_name"
echo "Server command: $server_command"
eval "$server_command" &
bash -c "$server_command" &
server_pid=$!
# wait until the server is alive
@@ -322,7 +319,7 @@ run_serving_tests() {
echo "Running test case $test_name with qps $qps"
echo "Client command: $client_command"
eval "$client_command"
bash -c "$client_command"
# record the benchmarking commands
jq_output=$(jq -n \