[Frontend][Core] Re-add shutdown timeout - allowing in-flight requests to finish (#36666)
Signed-off-by: Mark McLoughlin <markmc@redhat.com> Signed-off-by: Nick Hill <nickhill123@gmail.com> Co-authored-by: Claude Sonnet 4.5 <noreply@anthropic.com> Co-authored-by: Nick Hill <nickhill123@gmail.com>
This commit is contained in:
@@ -3,8 +3,9 @@
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
import threading
|
||||
import weakref
|
||||
from collections.abc import Iterator
|
||||
from collections.abc import Callable, Iterator
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
from multiprocessing import Process, connection
|
||||
@@ -146,11 +147,12 @@ class CoreEngineProcManager:
|
||||
finally:
|
||||
# Kill other procs if not all are running.
|
||||
if self.finished_procs():
|
||||
self.close()
|
||||
self.shutdown()
|
||||
|
||||
def close(self):
|
||||
"""Shutdown all procs."""
|
||||
self._finalizer()
|
||||
def shutdown(self, timeout: float | None = None) -> None:
|
||||
"""Shutdown engine core processes with configurable timeout."""
|
||||
if self._finalizer.detach() is not None:
|
||||
shutdown(self.processes, timeout=timeout)
|
||||
|
||||
def join_first(self):
|
||||
"""Wait for any process to exit."""
|
||||
@@ -168,6 +170,33 @@ class CoreEngineProcManager:
|
||||
}
|
||||
|
||||
|
||||
class SignalCallback:
|
||||
"""Safely trigger a callback from signal handler context via a dedicated thread."""
|
||||
|
||||
def __init__(self, callback: Callable[[], None]):
|
||||
self._callback = callback
|
||||
self._event = threading.Event()
|
||||
self._stopped = False
|
||||
self._thread = threading.Thread(
|
||||
target=self._run,
|
||||
daemon=True,
|
||||
name="signal-callback",
|
||||
)
|
||||
self._thread.start()
|
||||
|
||||
def _run(self):
|
||||
self._event.wait()
|
||||
if not self._stopped:
|
||||
self._callback()
|
||||
|
||||
def trigger(self):
|
||||
self._event.set()
|
||||
|
||||
def stop(self):
|
||||
self._stopped = True
|
||||
self._event.set()
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def set_device_control_env_var(
|
||||
vllm_config: VllmConfig, local_dp_rank: int
|
||||
@@ -763,7 +792,7 @@ class CoreEngineActorManager:
|
||||
def get_run_refs(self):
|
||||
return self.run_refs
|
||||
|
||||
def close(self):
|
||||
def shutdown(self, timeout: float | None = None) -> None:
|
||||
import ray
|
||||
|
||||
for actor in self.local_engine_actors + self.remote_engine_actors:
|
||||
|
||||
Reference in New Issue
Block a user