[Frontend][Core] Add shutdown timeout - allowing in-flight requests to finish (#34730)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
This commit is contained in:
Mark McLoughlin
2026-03-06 06:04:31 +00:00
committed by GitHub
parent 57c84ff129
commit 27066d1b2b
15 changed files with 762 additions and 90 deletions

View File

@@ -3,6 +3,7 @@
import contextlib
import os
import threading
import weakref
from collections.abc import Callable, Iterator
from dataclasses import dataclass
@@ -151,11 +152,12 @@ class CoreEngineProcManager:
finally:
# Kill other procs if not all are running.
if self.finished_procs():
self.close()
self.shutdown()
def close(self):
"""Shutdown all procs."""
self._finalizer()
def shutdown(self, timeout: float | None = None) -> None:
"""Shutdown engine core processes with configurable timeout."""
if self._finalizer.detach() is not None:
shutdown(self.processes, timeout=timeout)
def join_first(self):
"""Wait for any process to exit."""
@@ -173,6 +175,33 @@ class CoreEngineProcManager:
}
class SignalCallback:
"""Safely trigger a callback from signal handler context via a dedicated thread."""
def __init__(self, callback: Callable[[], None]):
self._callback = callback
self._event = threading.Event()
self._stopped = False
self._thread = threading.Thread(
target=self._run,
daemon=True,
name="signal-callback",
)
self._thread.start()
def _run(self):
self._event.wait()
if not self._stopped:
self._callback()
def trigger(self):
self._event.set()
def stop(self):
self._stopped = True
self._event.set()
@contextlib.contextmanager
def set_device_control_env_var(
vllm_config: VllmConfig, local_dp_rank: int
@@ -768,7 +797,7 @@ class CoreEngineActorManager:
def get_run_refs(self):
return self.run_refs
def close(self):
def shutdown(self, timeout: float | None = None) -> None:
import ray
for actor in self.local_engine_actors + self.remote_engine_actors: