[Frontend] Add vllm bench sweep to CLI (#27639)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
2025-10-29 20:59:48 +08:00
parent 9a0d2f0d92
commit ecca3fee76
19 changed files with 340 additions and 168 deletions
--- a/docs/mkdocs/hooks/generate_argparse.py
+++ b/docs/mkdocs/hooks/generate_argparse.py
@@ -56,15 +56,20 @@ def auto_mock(module, attr, max_mocks=50):
    )


-latency = auto_mock("vllm.benchmarks", "latency")
-serve = auto_mock("vllm.benchmarks", "serve")
-throughput = auto_mock("vllm.benchmarks", "throughput")
+bench_latency = auto_mock("vllm.benchmarks", "latency")
+bench_serve = auto_mock("vllm.benchmarks", "serve")
+bench_sweep_plot = auto_mock("vllm.benchmarks.sweep.plot", "SweepPlotArgs")
+bench_sweep_serve = auto_mock("vllm.benchmarks.sweep.serve", "SweepServeArgs")
+bench_sweep_serve_sla = auto_mock(
+    "vllm.benchmarks.sweep.serve_sla", "SweepServeSLAArgs"
+)
+bench_throughput = auto_mock("vllm.benchmarks", "throughput")
 AsyncEngineArgs = auto_mock("vllm.engine.arg_utils", "AsyncEngineArgs")
 EngineArgs = auto_mock("vllm.engine.arg_utils", "EngineArgs")
 ChatCommand = auto_mock("vllm.entrypoints.cli.openai", "ChatCommand")
 CompleteCommand = auto_mock("vllm.entrypoints.cli.openai", "CompleteCommand")
-cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
-run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
+openai_cli_args = auto_mock("vllm.entrypoints.openai", "cli_args")
+openai_run_batch = auto_mock("vllm.entrypoints.openai", "run_batch")
 FlexibleArgumentParser = auto_mock(
    "vllm.utils.argparse_utils", "FlexibleArgumentParser"
 )
@@ -114,6 +119,9 @@ class MarkdownFormatter(HelpFormatter):
                self._markdown_output.append(f"{action.help}\n\n")

            if (default := action.default) != SUPPRESS:
+                # Make empty string defaults visible
+                if default == "":
+                    default = '""'
                self._markdown_output.append(f"Default: `{default}`\n\n")

    def format_help(self):
@@ -150,17 +158,23 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):

    # Create parsers to document
    parsers = {
+        # Engine args
        "engine_args": create_parser(EngineArgs.add_cli_args),
        "async_engine_args": create_parser(
            AsyncEngineArgs.add_cli_args, async_args_only=True
        ),
-        "serve": create_parser(cli_args.make_arg_parser),
+        # CLI
+        "serve": create_parser(openai_cli_args.make_arg_parser),
        "chat": create_parser(ChatCommand.add_cli_args),
        "complete": create_parser(CompleteCommand.add_cli_args),
-        "bench_latency": create_parser(latency.add_cli_args),
-        "bench_throughput": create_parser(throughput.add_cli_args),
-        "bench_serve": create_parser(serve.add_cli_args),
-        "run-batch": create_parser(run_batch.make_arg_parser),
+        "run-batch": create_parser(openai_run_batch.make_arg_parser),
+        # Benchmark CLI
+        "bench_latency": create_parser(bench_latency.add_cli_args),
+        "bench_serve": create_parser(bench_serve.add_cli_args),
+        "bench_sweep_plot": create_parser(bench_sweep_plot.add_cli_args),
+        "bench_sweep_serve": create_parser(bench_sweep_serve.add_cli_args),
+        "bench_sweep_serve_sla": create_parser(bench_sweep_serve_sla.add_cli_args),
+        "bench_throughput": create_parser(bench_throughput.add_cli_args),
    }

    # Generate documentation for each parser