[Frontend][Core] Revert "Add shutdown timeout" (#34730 and #36270) (#36628)

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
This commit is contained in:
Mark McLoughlin
2026-03-10 13:20:41 +00:00
committed by GitHub
parent c88510083b
commit 234860399b
14 changed files with 95 additions and 761 deletions

View File

@@ -3,7 +3,6 @@
import contextlib
import os
import threading
import weakref
from collections.abc import Callable, Iterator
from dataclasses import dataclass
@@ -152,12 +151,11 @@ class CoreEngineProcManager:
finally:
# Kill other procs if not all are running.
if self.finished_procs():
self.shutdown()
self.close()
def shutdown(self, timeout: float | None = None) -> None:
"""Shutdown engine core processes with configurable timeout."""
if self._finalizer.detach() is not None:
shutdown(self.processes, timeout=timeout)
def close(self):
"""Shutdown all procs."""
self._finalizer()
def join_first(self):
"""Wait for any process to exit."""
@@ -175,33 +173,6 @@ class CoreEngineProcManager:
}
class SignalCallback:
"""Safely trigger a callback from signal handler context via a dedicated thread."""
def __init__(self, callback: Callable[[], None]):
self._callback = callback
self._event = threading.Event()
self._stopped = False
self._thread = threading.Thread(
target=self._run,
daemon=True,
name="signal-callback",
)
self._thread.start()
def _run(self):
self._event.wait()
if not self._stopped:
self._callback()
def trigger(self):
self._event.set()
def stop(self):
self._stopped = True
self._event.set()
@contextlib.contextmanager
def set_device_control_env_var(
vllm_config: VllmConfig, local_dp_rank: int
@@ -797,7 +768,7 @@ class CoreEngineActorManager:
def get_run_refs(self):
return self.run_refs
def shutdown(self, timeout: float | None = None) -> None:
def close(self):
import ray
for actor in self.local_engine_actors + self.remote_engine_actors: