diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index a4e1b163d..33aca8318 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -574,7 +574,7 @@ async def benchmark( ) print( "{:<40} {:<10.2f}".format( - "Total Token throughput (tok/s):", metrics.total_token_throughput + "Total token throughput (tok/s):", metrics.total_token_throughput ) ) diff --git a/docs/benchmarking/cli.md b/docs/benchmarking/cli.md index 1ce6b6117..dd5a12e40 100644 --- a/docs/benchmarking/cli.md +++ b/docs/benchmarking/cli.md @@ -84,7 +84,7 @@ Total input tokens: 1369 Total generated tokens: 2212 Request throughput (req/s): 1.73 Output token throughput (tok/s): 382.89 -Total Token throughput (tok/s): 619.85 +Total token throughput (tok/s): 619.85 ---------------Time to First Token---------------- Mean TTFT (ms): 71.54 Median TTFT (ms): 73.88 diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 2e2054a8a..254e4d35e 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -788,7 +788,7 @@ async def benchmark( ) print( "{:<40} {:<10.2f}".format( - "Total Token throughput (tok/s):", metrics.total_token_throughput + "Total token throughput (tok/s):", metrics.total_token_throughput ) )