[core] set up data parallel communication (#13591)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-02-22 19:28:59 +08:00
committed by GitHub
parent 7f6bae561c
commit 3e472d882a
17 changed files with 416 additions and 28 deletions

View File

@@ -501,6 +501,24 @@ def get_open_zmq_ipc_path() -> str:
def get_open_port() -> int:
"""
Get an open port for the vLLM process to listen on.
An edge case to handle, is when we run data parallel,
we need to avoid ports that are potentially used by
the data parallel master process.
Right now we reserve 10 ports for the data parallel master
process. Currently it uses 2 ports.
"""
if "VLLM_DP_MASTER_PORT" in os.environ:
dp_port = envs.VLLM_DP_MASTER_PORT
while True:
port = _get_open_port()
if port >= dp_port and port < dp_port + 10:
continue
return port
return _get_open_port()
def _get_open_port() -> int:
port = envs.VLLM_PORT
if port is not None:
while True: