[BugFix] Support online dense model DP without overhead (#30739)

Signed-off-by: Nick Hill <nhill@redhat.com>
Signed-off-by: njhill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-01-02 07:36:38 -08:00
committed by GitHub
parent 08f425bad1
commit bd877162eb
20 changed files with 345 additions and 146 deletions

View File

@@ -179,22 +179,20 @@ class Worker(WorkerBase):
self.cache_config.num_cpu_blocks = num_cpu_blocks
def init_device(self):
device = self.device_config.device
if isinstance(device, torch.device) and device.type == "cuda":
if self.device_config.device_type == "cuda":
# This env var set by Ray causes exceptions with graph building.
os.environ.pop("NCCL_ASYNC_ERROR_HANDLING", None)
parallel_config = self.parallel_config
if (
self.parallel_config.data_parallel_size > 1
and self.parallel_config.data_parallel_size_local > 0
and self.parallel_config.distributed_executor_backend
not in ["ray", "external_launcher"]
and self.vllm_config.parallel_config.data_parallel_backend != "ray"
and self.vllm_config.parallel_config.nnodes_within_dp == 1
parallel_config.distributed_executor_backend
not in ("ray", "external_launcher")
and parallel_config.data_parallel_backend != "ray"
and parallel_config.nnodes_within_dp == 1
):
# Use local DP rank if available, otherwise use global DP rank.
dp_local_rank = self.parallel_config.data_parallel_rank_local
if dp_local_rank is None:
dp_local_rank = self.parallel_config.data_parallel_rank
dp_local_rank = self.parallel_config.data_parallel_index
tp_pp_world_size = (
self.parallel_config.pipeline_parallel_size