diff --git a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh index 3f1ea67a4..d62c01bc7 100755 --- a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh +++ b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh @@ -393,6 +393,11 @@ run_serving_tests() { fi fi + # save the compilation mode and optimization level on the serving results + # whenever they are set + compilation_config_mode=$(echo "$server_params" | jq -r '."compilation_config.mode" // empty') + optimization_level=$(echo "$server_params" | jq -r '.optimization_level // empty') + # iterate over different QPS for qps in $qps_list; do # remove the surrounding single quote from qps @@ -406,15 +411,15 @@ run_serving_tests() { for max_concurrency in $max_concurrency_list; do new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency echo " new test name $new_test_name" - # pass the tensor parallel size to the client so that it can be displayed - # on the benchmark dashboard + # pass the tensor parallel size, the compilation mode, and the optimization + # level to the client so that they can be used on the benchmark dashboard client_command="vllm bench serve \ --save-result \ --result-dir $RESULTS_FOLDER \ --result-filename ${new_test_name}.json \ --request-rate $qps \ --max-concurrency $max_concurrency \ - --metadata "tensor_parallel_size=$tp" \ + --metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level \ $client_args $client_remote_args " echo "Running test case $test_name with qps $qps" diff --git a/vllm/benchmarks/lib/utils.py b/vllm/benchmarks/lib/utils.py index 32e9db499..6d8bfd021 100644 --- a/vllm/benchmarks/lib/utils.py +++ b/vllm/benchmarks/lib/utils.py @@ -8,6 +8,32 @@ import os from typing import Any +def extract_field( + args: argparse.Namespace, extra_info: dict[str, Any], field_name: str +) -> str: + if field_name in extra_info: + return extra_info[field_name] + + v = args + # For example, args.compilation_config.mode + for nested_field in field_name.split("."): + if not hasattr(v, nested_field): + return "" + v = getattr(v, nested_field) + return v + + +def use_compile(args: argparse.Namespace, extra_info: dict[str, Any]) -> bool: + """ + Check if the benchmark is run with torch.compile + """ + return not ( + extract_field(args, extra_info, "compilation_config.mode") == "0" + or "eager" in getattr(args, "output_json", "") + or "eager" in getattr(args, "result_filename", "") + ) + + def convert_to_pytorch_benchmark_format( args: argparse.Namespace, metrics: dict[str, list], extra_info: dict[str, Any] ) -> list: @@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format( "name": "vLLM benchmark", "extra_info": { "args": vars(args), + "compilation_config.mode": extract_field( + args, extra_info, "compilation_config.mode" + ), + "optimization_level": extract_field( + args, extra_info, "optimization_level" + ), + # A boolean field used by vLLM benchmark HUD dashboard + "use_compile": use_compile(args, extra_info), }, }, "model": {