Indicate compile mode in the benchmark results (#32990)
Signed-off-by: Huy Do <huydhn@gmail.com>
This commit is contained in:
@@ -393,6 +393,11 @@ run_serving_tests() {
|
||||
fi
|
||||
fi
|
||||
|
||||
# save the compilation mode and optimization level on the serving results
|
||||
# whenever they are set
|
||||
compilation_config_mode=$(echo "$server_params" | jq -r '."compilation_config.mode" // empty')
|
||||
optimization_level=$(echo "$server_params" | jq -r '.optimization_level // empty')
|
||||
|
||||
# iterate over different QPS
|
||||
for qps in $qps_list; do
|
||||
# remove the surrounding single quote from qps
|
||||
@@ -406,15 +411,15 @@ run_serving_tests() {
|
||||
for max_concurrency in $max_concurrency_list; do
|
||||
new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency
|
||||
echo " new test name $new_test_name"
|
||||
# pass the tensor parallel size to the client so that it can be displayed
|
||||
# on the benchmark dashboard
|
||||
# pass the tensor parallel size, the compilation mode, and the optimization
|
||||
# level to the client so that they can be used on the benchmark dashboard
|
||||
client_command="vllm bench serve \
|
||||
--save-result \
|
||||
--result-dir $RESULTS_FOLDER \
|
||||
--result-filename ${new_test_name}.json \
|
||||
--request-rate $qps \
|
||||
--max-concurrency $max_concurrency \
|
||||
--metadata "tensor_parallel_size=$tp" \
|
||||
--metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level \
|
||||
$client_args $client_remote_args "
|
||||
|
||||
echo "Running test case $test_name with qps $qps"
|
||||
|
||||
@@ -8,6 +8,32 @@ import os
|
||||
from typing import Any
|
||||
|
||||
|
||||
def extract_field(
|
||||
args: argparse.Namespace, extra_info: dict[str, Any], field_name: str
|
||||
) -> str:
|
||||
if field_name in extra_info:
|
||||
return extra_info[field_name]
|
||||
|
||||
v = args
|
||||
# For example, args.compilation_config.mode
|
||||
for nested_field in field_name.split("."):
|
||||
if not hasattr(v, nested_field):
|
||||
return ""
|
||||
v = getattr(v, nested_field)
|
||||
return v
|
||||
|
||||
|
||||
def use_compile(args: argparse.Namespace, extra_info: dict[str, Any]) -> bool:
|
||||
"""
|
||||
Check if the benchmark is run with torch.compile
|
||||
"""
|
||||
return not (
|
||||
extract_field(args, extra_info, "compilation_config.mode") == "0"
|
||||
or "eager" in getattr(args, "output_json", "")
|
||||
or "eager" in getattr(args, "result_filename", "")
|
||||
)
|
||||
|
||||
|
||||
def convert_to_pytorch_benchmark_format(
|
||||
args: argparse.Namespace, metrics: dict[str, list], extra_info: dict[str, Any]
|
||||
) -> list:
|
||||
@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format(
|
||||
"name": "vLLM benchmark",
|
||||
"extra_info": {
|
||||
"args": vars(args),
|
||||
"compilation_config.mode": extract_field(
|
||||
args, extra_info, "compilation_config.mode"
|
||||
),
|
||||
"optimization_level": extract_field(
|
||||
args, extra_info, "optimization_level"
|
||||
),
|
||||
# A boolean field used by vLLM benchmark HUD dashboard
|
||||
"use_compile": use_compile(args, extra_info),
|
||||
},
|
||||
},
|
||||
"model": {
|
||||
|
||||
Reference in New Issue
Block a user