[V1] [4/N] API Server: ZMQ/MP Utilities (#11541)

This commit is contained in:
Robert Shaw
2024-12-27 20:45:08 -05:00
committed by GitHub
parent a60731247f
commit df04dffade
12 changed files with 242 additions and 210 deletions

View File

@@ -17,13 +17,12 @@ from vllm.distributed import (destroy_distributed_environment,
from vllm.distributed.device_communicators.shm_broadcast import (Handle,
MessageQueue)
from vllm.executor.multiproc_worker_utils import (
_add_prefix, get_mp_context, set_multiprocessing_worker_envs)
_add_prefix, set_multiprocessing_worker_envs)
from vllm.logger import init_logger
from vllm.utils import (get_distributed_init_method, get_open_port,
get_open_zmq_ipc_path)
from vllm.utils import (get_distributed_init_method, get_mp_context,
get_open_port, get_open_zmq_ipc_path, zmq_socket_ctx)
from vllm.v1.executor.abstract import Executor
from vllm.v1.outputs import ModelRunnerOutput
from vllm.v1.utils import make_zmq_socket
from vllm.worker.worker_base import WorkerWrapperBase
logger = init_logger(__name__)
@@ -250,7 +249,7 @@ class WorkerProc:
worker_response_mq_handle = self.worker_response_mq.export_handle()
# Send Readiness signal to EngineCore process.
with make_zmq_socket(ready_path, zmq.constants.PUSH) as ready_socket:
with zmq_socket_ctx(ready_path, zmq.constants.PUSH) as ready_socket:
payload = pickle.dumps(worker_response_mq_handle,
protocol=pickle.HIGHEST_PROTOCOL)
ready_socket.send_string(WorkerProc.READY_STR)
@@ -352,7 +351,7 @@ class WorkerProc:
ready_path: str,
) -> Optional[Handle]:
"""Wait until the Worker is ready."""
with make_zmq_socket(ready_path, zmq.constants.PULL) as socket:
with zmq_socket_ctx(ready_path, zmq.constants.PULL) as socket:
# Wait for Worker to send READY.
while socket.poll(timeout=POLLING_TIMEOUT_MS) == 0: