[misc][distributed] use 127.0.0.1 for single-node (#5619)

2024-06-19 01:05:00 -07:00
parent f758aed0e8
commit 3eea74889f
2 changed files with 15 additions and 2 deletions
--- a/vllm/executor/multiproc_gpu_executor.py
+++ b/vllm/executor/multiproc_gpu_executor.py
@@ -10,7 +10,7 @@ from vllm.executor.multiproc_worker_utils import (ProcessWorkerWrapper,
 from vllm.logger import init_logger
 from vllm.sequence import ExecuteModelRequest, SamplerOutput
 from vllm.utils import (cuda_device_count_stateless,
-                        get_distributed_init_method, get_ip, get_open_port,
+                        get_distributed_init_method, get_open_port,
                        get_vllm_instance_id, make_async)

 logger = init_logger(__name__)
@@ -37,8 +37,11 @@ class MultiprocessingGPUExecutor(DistributedGPUExecutor):
        assert world_size <= cuda_device_count_stateless(), (
            "please set tensor_parallel_size to less than max local gpu count")

+        # Multiprocessing-based executor does not support multi-node setting.
+        # Since it only works for single node, we can use the loopback address
+        # 127.0.0.1 for communication.
        distributed_init_method = get_distributed_init_method(
-            get_ip(), get_open_port())
+            "127.0.0.1", get_open_port())

        if world_size == 1:
            self.workers = []