diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 2e6013562..0a600aeb5 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -412,6 +412,12 @@ def calculate_metrics( # Find the time range across all successful requests successful_outputs = [output for output in outputs if output.success] failed_outputs = [output for output in outputs if not output.success] + + if len(failed_outputs) > 0: + print("Failed requests during benchmark run detected (capping to 10):") + for i, err in enumerate(failed_outputs[:10]): + print(f"Error {i}: {err.error}") + if successful_outputs: min_start_time = min(output.start_time for output in successful_outputs) max_end_time = max(