From 243e78c20fd74a68f86b6523c1f607eb3cc14ab2 Mon Sep 17 00:00:00 2001
From: Isotr0py <mozf@mail2.sysu.edu.cn>
Date: Fri, 23 Jan 2026 20:11:18 +0800
Subject: [PATCH] [Benchmark][Bugfix] Fix race condtion when starting server
 for sweep benchmark (#32927)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
---
 vllm/benchmarks/sweep/serve.py  | 13 +++++++++++++
 vllm/benchmarks/sweep/server.py | 24 ++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/vllm/benchmarks/sweep/serve.py b/vllm/benchmarks/sweep/serve.py
index 6626707cf..8b129e49a 100644
--- a/vllm/benchmarks/sweep/serve.py
+++ b/vllm/benchmarks/sweep/serve.py
@@ -29,6 +29,7 @@ def run_server(
     show_stdout: bool,
     serve_overrides: ParameterSweepItem,
     dry_run: bool,
+    server_ready_timeout: int = 300,
 ):
     server_cmd = serve_overrides.apply_to_cmd(serve_cmd)
 
@@ -42,6 +43,7 @@ def run_server(
         return
 
     with ServerProcess(server_cmd, after_bench_cmd, show_stdout=show_stdout) as server:
+        server.wait_until_ready(timeout=server_ready_timeout)
         yield server
 
     print("[END SERVER]")
@@ -212,6 +214,7 @@ def run_combs(
     num_runs: int,
     dry_run: bool,
     links: list[tuple[str, str]],
+    server_ready_timeout: int = 300,
 ):
     all_data = list[dict[str, object]]()
     for serve_comb in serve_params:
@@ -222,6 +225,7 @@ def run_combs(
                 show_stdout=show_stdout,
                 serve_overrides=serve_comb,
                 dry_run=dry_run,
+                server_ready_timeout=server_ready_timeout,
             )
             if _comb_needs_server(serve_comb, bench_params, output_dir)
             else contextlib.nullcontext()
@@ -272,6 +276,7 @@ class SweepServeArgs:
     dry_run: bool
     resume: str | None
     link_vars: list[tuple[str, str]] | None
+    server_ready_timeout: int
 
     parser_name: ClassVar[str] = "serve"
     parser_help: ClassVar[str] = "Run vLLM server benchmark under multiple settings."
@@ -312,6 +317,7 @@ class SweepServeArgs:
             dry_run=args.dry_run,
             resume=args.resume,
             link_vars=link_vars,
+            server_ready_timeout=args.server_ready_timeout,
         )
 
     @classmethod
@@ -341,6 +347,12 @@ class SweepServeArgs:
             help="If set, logs the standard output of subcommands. "
             "Useful for debugging but can be quite spammy.",
         )
+        parser.add_argument(
+            "--server-ready-timeout",
+            type=int,
+            default=300,
+            help="Timeout in seconds to wait for the server to become ready.",
+        )
         parser.add_argument(
             "--serve-params",
             type=str,
@@ -431,6 +443,7 @@ def run_main(args: SweepServeArgs):
             num_runs=args.num_runs,
             dry_run=args.dry_run,
             links=args.link_vars,
+            server_ready_timeout=args.server_ready_timeout,
         )
     except BaseException as exc:
         raise RuntimeError(
diff --git a/vllm/benchmarks/sweep/server.py b/vllm/benchmarks/sweep/server.py
index f17578726..6c6c0abcb 100644
--- a/vllm/benchmarks/sweep/server.py
+++ b/vllm/benchmarks/sweep/server.py
@@ -4,6 +4,7 @@ import contextlib
 import os
 import signal
 import subprocess
+import time
 from types import TracebackType
 
 import requests
@@ -88,6 +89,29 @@ class ServerProcess:
 
         return f"http://{host}:{port}"
 
+    def is_server_ready(self) -> bool:
+        server_address = self._get_vllm_server_address()
+        try:
+            response = requests.get(f"{server_address}/health")
+            return response.status_code == 200
+        except requests.RequestException:
+            return False
+
+    def wait_until_ready(self, timeout: int) -> None:
+        start_time = time.monotonic()
+        while not self.is_server_ready():
+            # Check if server process has crashed
+            if self._server_process.poll() is not None:
+                returncode = self._server_process.returncode
+                raise RuntimeError(
+                    f"Server process crashed with return code {returncode}"
+                )
+            if time.monotonic() - start_time > timeout:
+                raise TimeoutError(
+                    f"Server failed to become ready within {timeout} seconds."
+                )
+            time.sleep(1)
+
     def reset_caches(self) -> None:
         server_cmd = self.server_cmd