diff --git a/docs/benchmarking/sweeps.md b/docs/benchmarking/sweeps.md index 93b9f4d62..467d198ec 100644 --- a/docs/benchmarking/sweeps.md +++ b/docs/benchmarking/sweeps.md @@ -139,6 +139,63 @@ The algorithm for adjusting the SLA variable is as follows: For a given combination of `--serve-params` and `--bench-params`, we share the benchmark results across `--sla-params` to avoid rerunning benchmarks with the same SLA variable value. +### Startup + +`vllm bench sweep startup` runs `vllm bench startup` across parameter combinations to compare cold/warm startup time for different engine settings. + +Follow these steps to run the script: + +1. (Optional) Construct the base command to `vllm bench startup`, and pass it to `--startup-cmd` (default: `vllm bench startup`). +2. (Optional) Reuse a `--serve-params` JSON from `vllm bench sweep serve` to vary engine settings. Only parameters supported by `vllm bench startup` are applied. +3. (Optional) Create a `--startup-params` JSON to vary startup-specific options like iteration counts. +4. Determine where you want to save the results, and pass that to `--output-dir`. + +Example `--serve-params`: + +```json +[ + { + "_benchmark_name": "tp1", + "model": "Qwen/Qwen3-0.6B", + "tensor_parallel_size": 1, + "gpu_memory_utilization": 0.9 + }, + { + "_benchmark_name": "tp2", + "model": "Qwen/Qwen3-0.6B", + "tensor_parallel_size": 2, + "gpu_memory_utilization": 0.9 + } +] +``` + +Example `--startup-params`: + +```json +[ + { + "_benchmark_name": "qwen3-0.6", + "num_iters_cold": 2, + "num_iters_warmup": 1, + "num_iters_warm": 2 + } +] +``` + +Example command: + +```bash +vllm bench sweep startup \ + --startup-cmd 'vllm bench startup --model Qwen/Qwen3-0.6B' \ + --serve-params benchmarks/serve_hparams.json \ + --startup-params benchmarks/startup_hparams.json \ + -o benchmarks/results +``` + +!!! important + By default, unsupported parameters in `--serve-params` or `--startup-params` are ignored with a warning. + Use `--strict-params` to fail fast on unknown keys. + ## Visualization ### Basic diff --git a/vllm/benchmarks/sweep/cli.py b/vllm/benchmarks/sweep/cli.py index e74e0e2c1..a752000f9 100644 --- a/vllm/benchmarks/sweep/cli.py +++ b/vllm/benchmarks/sweep/cli.py @@ -12,10 +12,13 @@ from .serve import SweepServeArgs from .serve import main as serve_main from .serve_sla import SweepServeSLAArgs from .serve_sla import main as serve_sla_main +from .startup import SweepStartupArgs +from .startup import main as startup_main SUBCOMMANDS = ( (SweepServeArgs, serve_main), (SweepServeSLAArgs, serve_sla_main), + (SweepStartupArgs, startup_main), (SweepPlotArgs, plot_main), (SweepPlotParetoArgs, plot_pareto_main), ) diff --git a/vllm/benchmarks/sweep/startup.py b/vllm/benchmarks/sweep/startup.py new file mode 100644 index 000000000..8d779b364 --- /dev/null +++ b/vllm/benchmarks/sweep/startup.py @@ -0,0 +1,405 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import argparse +import json +import shlex +import subprocess +from dataclasses import dataclass +from datetime import datetime +from functools import lru_cache +from pathlib import Path +from typing import ClassVar + +from vllm.benchmarks.startup import add_cli_args as add_startup_cli_args +from vllm.utils.argparse_utils import FlexibleArgumentParser +from vllm.utils.import_utils import PlaceholderModule + +from .param_sweep import ParameterSweep, ParameterSweepItem +from .utils import sanitize_filename + +try: + import pandas as pd +except ImportError: + pd = PlaceholderModule("pandas") + + +@lru_cache(maxsize=1) +def _get_supported_startup_keys() -> set[str]: + parser = FlexibleArgumentParser(add_help=False) + add_startup_cli_args(parser) + + supported: set[str] = {"config"} + for action in parser._actions: + if action.dest and action.dest is not argparse.SUPPRESS: + supported.add(action.dest) + for option in action.option_strings: + if option.startswith("--"): + supported.add(option.lstrip("-").replace("-", "_")) + + return supported + + +def _is_supported_param(param_key: str, supported: set[str]) -> bool: + if param_key == "_benchmark_name": + return True + prefix = param_key.split(".", 1)[0] + normalized = prefix.replace("-", "_") + return normalized in supported + + +def _filter_params( + params: ParameterSweep, *, supported: set[str], strict: bool +) -> ParameterSweep: + filtered = [] + for item in params: + kept: dict[str, object] = {} + dropped: list[str] = [] + for key, value in item.items(): + if _is_supported_param(key, supported): + kept[key] = value + else: + dropped.append(key) + + if dropped: + label = item.get("_benchmark_name") or item.as_text() + message = ( + "Ignoring unsupported startup params" + f"{' for ' + str(label) if label else ''}: " + f"{', '.join(sorted(dropped))}" + ) + if strict: + raise ValueError(message) + print(message) + + filtered.append(ParameterSweepItem.from_record(kept)) + + return ParameterSweep(filtered) + + +def _update_run_data( + run_data: dict[str, object], + serve_overrides: ParameterSweepItem, + startup_overrides: ParameterSweepItem, + run_number: int, +) -> dict[str, object]: + run_data["run_number"] = run_number + run_data.update(serve_overrides) + run_data.update(startup_overrides) + return run_data + + +def _strip_arg(cmd: list[str], keys: tuple[str, ...]) -> list[str]: + stripped: list[str] = [] + skip_next = False + for arg in cmd: + if skip_next: + skip_next = False + continue + if arg in keys: + skip_next = True + continue + if any(arg.startswith(f"{key}=") for key in keys): + continue + stripped.append(arg) + return stripped + + +def _apply_output_json(cmd: list[str], output_path: Path) -> list[str]: + keys = ("--output-json", "--output_json") + cmd = _strip_arg(cmd, keys) + return [*cmd, keys[0], str(output_path)] + + +def _get_comb_base_path( + output_dir: Path, + serve_comb: ParameterSweepItem, + startup_comb: ParameterSweepItem, +) -> Path: + parts = list[str]() + if serve_comb: + parts.extend(("SERVE-", serve_comb.name)) + if startup_comb: + parts.extend(("STARTUP-", startup_comb.name)) + return output_dir / sanitize_filename("-".join(parts)) + + +def _get_comb_run_path(base_path: Path, run_number: int | None) -> Path: + if run_number is None: + return base_path / "summary.json" + return base_path / f"run={run_number}.json" + + +def run_benchmark( + startup_cmd: list[str], + *, + serve_overrides: ParameterSweepItem, + startup_overrides: ParameterSweepItem, + run_number: int, + output_path: Path, + show_stdout: bool, + dry_run: bool, +) -> dict[str, object] | None: + cmd = serve_overrides.apply_to_cmd(startup_cmd) + cmd = startup_overrides.apply_to_cmd(cmd) + cmd = _apply_output_json(cmd, output_path) + + print("[BEGIN BENCHMARK]") + print(f"Serve overrides: {serve_overrides}") + print(f"Startup overrides: {startup_overrides}") + print(f"Run Number: {run_number}") + print(f"Benchmark command: {cmd}") + print(f"Output file: {output_path}") + + if output_path.exists(): + print("Found existing results. Skipping.") + + with output_path.open("r", encoding="utf-8") as f: + run_data = json.load(f) + return _update_run_data( + run_data, serve_overrides, startup_overrides, run_number + ) + + if dry_run: + print("[END BENCHMARK]") + return None + + output_path.parent.mkdir(parents=True, exist_ok=True) + subprocess.run( + cmd, + stdout=None if show_stdout else subprocess.DEVNULL, + check=True, + ) + + with output_path.open("r", encoding="utf-8") as f: + run_data = json.load(f) + + run_data = _update_run_data( + run_data, serve_overrides, startup_overrides, run_number + ) + + with output_path.open("w", encoding="utf-8") as f: + json.dump(run_data, f, indent=4) + + print("[END BENCHMARK]") + return run_data + + +def run_comb( + startup_cmd: list[str], + *, + serve_comb: ParameterSweepItem, + startup_comb: ParameterSweepItem, + base_path: Path, + num_runs: int, + show_stdout: bool, + dry_run: bool, +) -> list[dict[str, object]] | None: + comb_data = list[dict[str, object]]() + for run_number in range(num_runs): + run_data = run_benchmark( + startup_cmd, + serve_overrides=serve_comb, + startup_overrides=startup_comb, + run_number=run_number, + output_path=_get_comb_run_path(base_path, run_number), + show_stdout=show_stdout, + dry_run=dry_run, + ) + if run_data is not None: + comb_data.append(run_data) + + if dry_run: + return None + + with _get_comb_run_path(base_path, run_number=None).open( + "w", encoding="utf-8" + ) as f: + json.dump(comb_data, f, indent=4) + + return comb_data + + +def run_combs( + startup_cmd: list[str], + *, + serve_params: ParameterSweep, + startup_params: ParameterSweep, + output_dir: Path, + num_runs: int, + show_stdout: bool, + dry_run: bool, +) -> "pd.DataFrame | None": + all_data = list[dict[str, object]]() + for serve_comb in serve_params: + for startup_comb in startup_params: + base_path = _get_comb_base_path(output_dir, serve_comb, startup_comb) + comb_data = run_comb( + startup_cmd, + serve_comb=serve_comb, + startup_comb=startup_comb, + base_path=base_path, + num_runs=num_runs, + show_stdout=show_stdout, + dry_run=dry_run, + ) + if comb_data is not None: + all_data.extend(comb_data) + + if dry_run: + return None + + combined_df = pd.DataFrame.from_records(all_data) + combined_df.to_csv(output_dir / "summary.csv") + return combined_df + + +@dataclass +class SweepStartupArgs: + startup_cmd: list[str] + serve_params: ParameterSweep + startup_params: ParameterSweep + output_dir: Path + num_runs: int + show_stdout: bool + dry_run: bool + resume: str | None + strict_params: bool + + parser_name: ClassVar[str] = "startup" + parser_help: ClassVar[str] = ( + "Benchmark vLLM startup time over parameter combinations." + ) + + @classmethod + def from_cli_args(cls, args: argparse.Namespace): + startup_cmd = shlex.split(args.startup_cmd) + + if args.serve_params: + serve_params = ParameterSweep.read_json(args.serve_params) + else: + serve_params = ParameterSweep.from_records([{}]) + + if args.startup_params: + startup_params = ParameterSweep.read_json(args.startup_params) + else: + startup_params = ParameterSweep.from_records([{}]) + + supported = _get_supported_startup_keys() + serve_params = _filter_params( + serve_params, supported=supported, strict=args.strict_params + ) + startup_params = _filter_params( + startup_params, supported=supported, strict=args.strict_params + ) + + if args.num_runs < 1: + raise ValueError("`num_runs` should be at least 1.") + + return cls( + startup_cmd=startup_cmd, + serve_params=serve_params, + startup_params=startup_params, + output_dir=Path(args.output_dir), + num_runs=args.num_runs, + show_stdout=args.show_stdout, + dry_run=args.dry_run, + resume=args.resume, + strict_params=args.strict_params, + ) + + @classmethod + def add_cli_args(cls, parser: argparse.ArgumentParser) -> argparse.ArgumentParser: + parser.add_argument( + "--startup-cmd", + type=str, + default="vllm bench startup", + help="The command used to run the startup benchmark.", + ) + parser.add_argument( + "--serve-params", + type=str, + default=None, + help="Path to JSON file containing parameter combinations " + "for the `vllm serve` command. Only parameters supported by " + "`vllm bench startup` will be applied.", + ) + parser.add_argument( + "--startup-params", + type=str, + default=None, + help="Path to JSON file containing parameter combinations " + "for the `vllm bench startup` command.", + ) + parser.add_argument( + "-o", + "--output-dir", + type=str, + default="results", + help="The directory to which results are written.", + ) + parser.add_argument( + "--num-runs", + type=int, + default=1, + help="Number of runs per parameter combination.", + ) + parser.add_argument( + "--show-stdout", + action="store_true", + help="If set, logs the standard output of subcommands.", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="If set, prints the commands to run, " + "then exits without executing them.", + ) + parser.add_argument( + "--resume", + type=str, + default=None, + help="Set this to the name of a directory under `output_dir` (which is a " + "timestamp) to resume a previous execution of this script, i.e., only run " + "parameter combinations for which there are still no output files.", + ) + parser.add_argument( + "--strict-params", + action="store_true", + help="If set, unknown parameters in sweep files raise an error " + "instead of being ignored.", + ) + return parser + + +def run_main(args: SweepStartupArgs): + timestamp = args.resume or datetime.now().strftime("%Y%m%d_%H%M%S") + output_dir = args.output_dir / timestamp + + if args.resume and not output_dir.exists(): + raise ValueError(f"Cannot resume from non-existent directory ({output_dir})") + + try: + return run_combs( + startup_cmd=args.startup_cmd, + serve_params=args.serve_params, + startup_params=args.startup_params, + output_dir=output_dir, + num_runs=args.num_runs, + show_stdout=args.show_stdout, + dry_run=args.dry_run, + ) + except BaseException as exc: + raise RuntimeError( + f"The script was terminated early. Use `--resume {timestamp}` " + f"to continue the script from its last checkpoint." + ) from exc + + +def main(args: argparse.Namespace): + run_main(SweepStartupArgs.from_cli_args(args)) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=SweepStartupArgs.parser_help) + SweepStartupArgs.add_cli_args(parser) + main(parser.parse_args())