[Frontend][Core] Re-add shutdown timeout - allowing in-flight requests to finish (#36666)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Nick Hill <nickhill123@gmail.com>
Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com>
Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
Mark McLoughlin
2026-03-13 19:10:06 +00:00
committed by GitHub
parent 5a3f1eb62f
commit 7afe0faab1
14 changed files with 762 additions and 96 deletions

View File

@@ -3,8 +3,9 @@
import contextlib
import os
import threading
import weakref
from collections.abc import Iterator
from collections.abc import Callable, Iterator
from dataclasses import dataclass
from enum import Enum, auto
from multiprocessing import Process, connection
@@ -146,11 +147,12 @@ class CoreEngineProcManager:
finally:
# Kill other procs if not all are running.
if self.finished_procs():
self.close()
self.shutdown()
def close(self):
"""Shutdown all procs."""
self._finalizer()
def shutdown(self, timeout: float | None = None) -> None:
"""Shutdown engine core processes with configurable timeout."""
if self._finalizer.detach() is not None:
shutdown(self.processes, timeout=timeout)
def join_first(self):
"""Wait for any process to exit."""
@@ -168,6 +170,33 @@ class CoreEngineProcManager:
}
class SignalCallback:
"""Safely trigger a callback from signal handler context via a dedicated thread."""
def __init__(self, callback: Callable[[], None]):
self._callback = callback
self._event = threading.Event()
self._stopped = False
self._thread = threading.Thread(
target=self._run,
daemon=True,
name="signal-callback",
)
self._thread.start()
def _run(self):
self._event.wait()
if not self._stopped:
self._callback()
def trigger(self):
self._event.set()
def stop(self):
self._stopped = True
self._event.set()
@contextlib.contextmanager
def set_device_control_env_var(
vllm_config: VllmConfig, local_dp_rank: int
@@ -763,7 +792,7 @@ class CoreEngineActorManager:
def get_run_refs(self):
return self.run_refs
def close(self):
def shutdown(self, timeout: float | None = None) -> None:
import ray
for actor in self.local_engine_actors + self.remote_engine_actors: