[V1][Frontend] Improve Shutdown And Logs (#11737)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com>
Signed-off-by: Nick Hill <nhill@redhat.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com>
Co-authored-by: Russell Bryant <rbryant@redhat.com>
Co-authored-by: Andrew Feldman <afeldman@neuralmagic.com>
Co-authored-by: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com>
Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Robert Shaw
2025-04-16 22:48:34 -04:00
committed by GitHub
parent 3c776dcefb
commit 2b05b8ce69
16 changed files with 1031 additions and 347 deletions

View File

@@ -7,11 +7,13 @@ import time
from contextlib import contextmanager
from dataclasses import dataclass, field
from multiprocessing import shared_memory
from typing import List, Optional, Tuple, Union
from threading import Event
from typing import Any, List, Optional, Tuple, Union
from unittest.mock import patch
import torch
import torch.distributed as dist
import zmq
from torch.distributed import ProcessGroup
from zmq import IPV6 # type: ignore
from zmq import SUB, SUBSCRIBE, XPUB, XPUB_VERBOSE, Context # type: ignore
@@ -400,7 +402,9 @@ class MessageQueue:
break
@contextmanager
def acquire_read(self, timeout: Optional[float] = None):
def acquire_read(self,
timeout: Optional[float] = None,
cancel: Optional[Event] = None):
assert self._is_local_reader, "Only readers can acquire read"
start_time = time.monotonic()
n_warning = 1
@@ -430,6 +434,9 @@ class MessageQueue:
)
n_warning += 1
if cancel is not None and cancel.is_set():
raise RuntimeError("cancelled")
# if we time out, raise an exception
if (timeout is not None
and time.monotonic() - start_time > timeout):
@@ -464,10 +471,12 @@ class MessageQueue:
if self.n_remote_reader > 0:
self.remote_socket.send(serialized_obj)
def dequeue(self, timeout: Optional[float] = None):
def dequeue(self,
timeout: Optional[float] = None,
cancel: Optional[Event] = None):
""" Read from message queue with optional timeout (in seconds) """
if self._is_local_reader:
with self.acquire_read(timeout) as buf:
with self.acquire_read(timeout, cancel) as buf:
overflow = buf[0] == 1
if not overflow:
# no need to know the size of serialized object
@@ -475,15 +484,21 @@ class MessageQueue:
# see https://docs.python.org/3/library/pickle.html
obj = pickle.loads(buf[1:])
if overflow:
recv = self.local_socket.recv()
obj = pickle.loads(recv)
obj = MessageQueue.recv(self.local_socket, timeout)
elif self._is_remote_reader:
recv = self.remote_socket.recv()
obj = pickle.loads(recv)
obj = MessageQueue.recv(self.remote_socket, timeout)
else:
raise RuntimeError("Only readers can dequeue")
return obj
@staticmethod
def recv(socket: zmq.Socket, timeout: Optional[float]) -> Any:
timeout_ms = None if timeout is None else int(timeout * 1000)
if not socket.poll(timeout=timeout_ms):
raise TimeoutError
recv = socket.recv(copy=False)
return pickle.loads(recv.buffer)
def broadcast_object(self, obj=None):
if self._is_writer:
self.enqueue(obj)