Indicate compile mode in the benchmark results (#32990)

Signed-off-by: Huy Do <huydhn@gmail.com>
This commit is contained in:
Huy Do
2026-01-30 12:34:36 -08:00
committed by GitHub
parent c3a9752b0c
commit 9ca66ecc10
2 changed files with 42 additions and 3 deletions

View File

@@ -393,6 +393,11 @@ run_serving_tests() {
fi
fi
# save the compilation mode and optimization level on the serving results
# whenever they are set
compilation_config_mode=$(echo "$server_params" | jq -r '."compilation_config.mode" // empty')
optimization_level=$(echo "$server_params" | jq -r '.optimization_level // empty')
# iterate over different QPS
for qps in $qps_list; do
# remove the surrounding single quote from qps
@@ -406,15 +411,15 @@ run_serving_tests() {
for max_concurrency in $max_concurrency_list; do
new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency
echo " new test name $new_test_name"
# pass the tensor parallel size to the client so that it can be displayed
# on the benchmark dashboard
# pass the tensor parallel size, the compilation mode, and the optimization
# level to the client so that they can be used on the benchmark dashboard
client_command="vllm bench serve \
--save-result \
--result-dir $RESULTS_FOLDER \
--result-filename ${new_test_name}.json \
--request-rate $qps \
--max-concurrency $max_concurrency \
--metadata "tensor_parallel_size=$tp" \
--metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level \
$client_args $client_remote_args "
echo "Running test case $test_name with qps $qps"

View File

@@ -8,6 +8,32 @@ import os
from typing import Any
def extract_field(
args: argparse.Namespace, extra_info: dict[str, Any], field_name: str
) -> str:
if field_name in extra_info:
return extra_info[field_name]
v = args
# For example, args.compilation_config.mode
for nested_field in field_name.split("."):
if not hasattr(v, nested_field):
return ""
v = getattr(v, nested_field)
return v
def use_compile(args: argparse.Namespace, extra_info: dict[str, Any]) -> bool:
"""
Check if the benchmark is run with torch.compile
"""
return not (
extract_field(args, extra_info, "compilation_config.mode") == "0"
or "eager" in getattr(args, "output_json", "")
or "eager" in getattr(args, "result_filename", "")
)
def convert_to_pytorch_benchmark_format(
args: argparse.Namespace, metrics: dict[str, list], extra_info: dict[str, Any]
) -> list:
@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format(
"name": "vLLM benchmark",
"extra_info": {
"args": vars(args),
"compilation_config.mode": extract_field(
args, extra_info, "compilation_config.mode"
),
"optimization_level": extract_field(
args, extra_info, "optimization_level"
),
# A boolean field used by vLLM benchmark HUD dashboard
"use_compile": use_compile(args, extra_info),
},
},
"model": {