From 243e78c20fd74a68f86b6523c1f607eb3cc14ab2 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Fri, 23 Jan 2026 20:11:18 +0800 Subject: [PATCH] [Benchmark][Bugfix] Fix race condtion when starting server for sweep benchmark (#32927) Signed-off-by: Isotr0py --- vllm/benchmarks/sweep/serve.py | 13 +++++++++++++ vllm/benchmarks/sweep/server.py | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/vllm/benchmarks/sweep/serve.py b/vllm/benchmarks/sweep/serve.py index 6626707cf..8b129e49a 100644 --- a/vllm/benchmarks/sweep/serve.py +++ b/vllm/benchmarks/sweep/serve.py @@ -29,6 +29,7 @@ def run_server( show_stdout: bool, serve_overrides: ParameterSweepItem, dry_run: bool, + server_ready_timeout: int = 300, ): server_cmd = serve_overrides.apply_to_cmd(serve_cmd) @@ -42,6 +43,7 @@ def run_server( return with ServerProcess(server_cmd, after_bench_cmd, show_stdout=show_stdout) as server: + server.wait_until_ready(timeout=server_ready_timeout) yield server print("[END SERVER]") @@ -212,6 +214,7 @@ def run_combs( num_runs: int, dry_run: bool, links: list[tuple[str, str]], + server_ready_timeout: int = 300, ): all_data = list[dict[str, object]]() for serve_comb in serve_params: @@ -222,6 +225,7 @@ def run_combs( show_stdout=show_stdout, serve_overrides=serve_comb, dry_run=dry_run, + server_ready_timeout=server_ready_timeout, ) if _comb_needs_server(serve_comb, bench_params, output_dir) else contextlib.nullcontext() @@ -272,6 +276,7 @@ class SweepServeArgs: dry_run: bool resume: str | None link_vars: list[tuple[str, str]] | None + server_ready_timeout: int parser_name: ClassVar[str] = "serve" parser_help: ClassVar[str] = "Run vLLM server benchmark under multiple settings." @@ -312,6 +317,7 @@ class SweepServeArgs: dry_run=args.dry_run, resume=args.resume, link_vars=link_vars, + server_ready_timeout=args.server_ready_timeout, ) @classmethod @@ -341,6 +347,12 @@ class SweepServeArgs: help="If set, logs the standard output of subcommands. " "Useful for debugging but can be quite spammy.", ) + parser.add_argument( + "--server-ready-timeout", + type=int, + default=300, + help="Timeout in seconds to wait for the server to become ready.", + ) parser.add_argument( "--serve-params", type=str, @@ -431,6 +443,7 @@ def run_main(args: SweepServeArgs): num_runs=args.num_runs, dry_run=args.dry_run, links=args.link_vars, + server_ready_timeout=args.server_ready_timeout, ) except BaseException as exc: raise RuntimeError( diff --git a/vllm/benchmarks/sweep/server.py b/vllm/benchmarks/sweep/server.py index f17578726..6c6c0abcb 100644 --- a/vllm/benchmarks/sweep/server.py +++ b/vllm/benchmarks/sweep/server.py @@ -4,6 +4,7 @@ import contextlib import os import signal import subprocess +import time from types import TracebackType import requests @@ -88,6 +89,29 @@ class ServerProcess: return f"http://{host}:{port}" + def is_server_ready(self) -> bool: + server_address = self._get_vllm_server_address() + try: + response = requests.get(f"{server_address}/health") + return response.status_code == 200 + except requests.RequestException: + return False + + def wait_until_ready(self, timeout: int) -> None: + start_time = time.monotonic() + while not self.is_server_ready(): + # Check if server process has crashed + if self._server_process.poll() is not None: + returncode = self._server_process.returncode + raise RuntimeError( + f"Server process crashed with return code {returncode}" + ) + if time.monotonic() - start_time > timeout: + raise TimeoutError( + f"Server failed to become ready within {timeout} seconds." + ) + time.sleep(1) + def reset_caches(self) -> None: server_cmd = self.server_cmd