[V1][Frontend] Improve Shutdown And Logs (#11737)
Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Signed-off-by: Andrew Feldman <afeldman@neuralmagic.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com> Co-authored-by: Cyrus Leung <cyrus.tl.leung@gmail.com> Co-authored-by: Russell Bryant <rbryant@redhat.com> Co-authored-by: Andrew Feldman <afeldman@neuralmagic.com> Co-authored-by: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com> Co-authored-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@@ -7,11 +7,13 @@ import time
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from multiprocessing import shared_memory
|
||||
from typing import List, Optional, Tuple, Union
|
||||
from threading import Event
|
||||
from typing import Any, List, Optional, Tuple, Union
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
import zmq
|
||||
from torch.distributed import ProcessGroup
|
||||
from zmq import IPV6 # type: ignore
|
||||
from zmq import SUB, SUBSCRIBE, XPUB, XPUB_VERBOSE, Context # type: ignore
|
||||
@@ -400,7 +402,9 @@ class MessageQueue:
|
||||
break
|
||||
|
||||
@contextmanager
|
||||
def acquire_read(self, timeout: Optional[float] = None):
|
||||
def acquire_read(self,
|
||||
timeout: Optional[float] = None,
|
||||
cancel: Optional[Event] = None):
|
||||
assert self._is_local_reader, "Only readers can acquire read"
|
||||
start_time = time.monotonic()
|
||||
n_warning = 1
|
||||
@@ -430,6 +434,9 @@ class MessageQueue:
|
||||
)
|
||||
n_warning += 1
|
||||
|
||||
if cancel is not None and cancel.is_set():
|
||||
raise RuntimeError("cancelled")
|
||||
|
||||
# if we time out, raise an exception
|
||||
if (timeout is not None
|
||||
and time.monotonic() - start_time > timeout):
|
||||
@@ -464,10 +471,12 @@ class MessageQueue:
|
||||
if self.n_remote_reader > 0:
|
||||
self.remote_socket.send(serialized_obj)
|
||||
|
||||
def dequeue(self, timeout: Optional[float] = None):
|
||||
def dequeue(self,
|
||||
timeout: Optional[float] = None,
|
||||
cancel: Optional[Event] = None):
|
||||
""" Read from message queue with optional timeout (in seconds) """
|
||||
if self._is_local_reader:
|
||||
with self.acquire_read(timeout) as buf:
|
||||
with self.acquire_read(timeout, cancel) as buf:
|
||||
overflow = buf[0] == 1
|
||||
if not overflow:
|
||||
# no need to know the size of serialized object
|
||||
@@ -475,15 +484,21 @@ class MessageQueue:
|
||||
# see https://docs.python.org/3/library/pickle.html
|
||||
obj = pickle.loads(buf[1:])
|
||||
if overflow:
|
||||
recv = self.local_socket.recv()
|
||||
obj = pickle.loads(recv)
|
||||
obj = MessageQueue.recv(self.local_socket, timeout)
|
||||
elif self._is_remote_reader:
|
||||
recv = self.remote_socket.recv()
|
||||
obj = pickle.loads(recv)
|
||||
obj = MessageQueue.recv(self.remote_socket, timeout)
|
||||
else:
|
||||
raise RuntimeError("Only readers can dequeue")
|
||||
return obj
|
||||
|
||||
@staticmethod
|
||||
def recv(socket: zmq.Socket, timeout: Optional[float]) -> Any:
|
||||
timeout_ms = None if timeout is None else int(timeout * 1000)
|
||||
if not socket.poll(timeout=timeout_ms):
|
||||
raise TimeoutError
|
||||
recv = socket.recv(copy=False)
|
||||
return pickle.loads(recv.buffer)
|
||||
|
||||
def broadcast_object(self, obj=None):
|
||||
if self._is_writer:
|
||||
self.enqueue(obj)
|
||||
|
||||
Reference in New Issue
Block a user