[DP] Internal Load Balancing Per Node [one-pod-per-node] (#21238)
Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Nick Hill <nhill@redhat.com> Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com> Co-authored-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
@@ -295,9 +295,11 @@ class EngineArgs:
|
||||
tensor_parallel_size: int = ParallelConfig.tensor_parallel_size
|
||||
data_parallel_size: int = ParallelConfig.data_parallel_size
|
||||
data_parallel_rank: Optional[int] = None
|
||||
data_parallel_start_rank: Optional[int] = None
|
||||
data_parallel_size_local: Optional[int] = None
|
||||
data_parallel_address: Optional[str] = None
|
||||
data_parallel_rpc_port: Optional[int] = None
|
||||
data_parallel_hybrid_lb: bool = False
|
||||
data_parallel_backend: str = ParallelConfig.data_parallel_backend
|
||||
enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
|
||||
enable_eplb: bool = ParallelConfig.enable_eplb
|
||||
@@ -604,6 +606,11 @@ class EngineArgs:
|
||||
type=int,
|
||||
help='Data parallel rank of this instance. '
|
||||
'When set, enables external load balancer mode.')
|
||||
parallel_group.add_argument('--data-parallel-start-rank',
|
||||
'-dpr',
|
||||
type=int,
|
||||
help='Starting data parallel rank '
|
||||
'for secondary nodes.')
|
||||
parallel_group.add_argument('--data-parallel-size-local',
|
||||
'-dpl',
|
||||
type=int,
|
||||
@@ -625,6 +632,9 @@ class EngineArgs:
|
||||
default='mp',
|
||||
help='Backend for data parallel, either '
|
||||
'"mp" or "ray".')
|
||||
parallel_group.add_argument(
|
||||
"--data-parallel-hybrid-lb",
|
||||
**parallel_kwargs["data_parallel_hybrid_lb"])
|
||||
parallel_group.add_argument(
|
||||
"--enable-expert-parallel",
|
||||
**parallel_kwargs["enable_expert_parallel"])
|
||||
@@ -972,6 +982,7 @@ class EngineArgs:
|
||||
def create_engine_config(
|
||||
self,
|
||||
usage_context: Optional[UsageContext] = None,
|
||||
headless: bool = False,
|
||||
) -> VllmConfig:
|
||||
"""
|
||||
Create the VllmConfig.
|
||||
@@ -1060,15 +1071,41 @@ class EngineArgs:
|
||||
# but we should not do this here.
|
||||
placement_group = ray.util.get_current_placement_group()
|
||||
|
||||
assert not headless or not self.data_parallel_hybrid_lb, (
|
||||
"data_parallel_hybrid_lb is not applicable in "
|
||||
"headless mode")
|
||||
|
||||
data_parallel_external_lb = self.data_parallel_rank is not None
|
||||
# Local DP rank = 1, use pure-external LB.
|
||||
if data_parallel_external_lb:
|
||||
assert self.data_parallel_size_local in (1, None), (
|
||||
"data_parallel_size_local must be 1 when data_parallel_rank "
|
||||
"is set")
|
||||
data_parallel_size_local = 1
|
||||
# Use full external lb if we have local_size of 1.
|
||||
self.data_parallel_hybrid_lb = False
|
||||
elif self.data_parallel_size_local is not None:
|
||||
data_parallel_size_local = self.data_parallel_size_local
|
||||
|
||||
if self.data_parallel_start_rank and not headless:
|
||||
# Infer hybrid LB mode.
|
||||
self.data_parallel_hybrid_lb = True
|
||||
|
||||
if self.data_parallel_hybrid_lb and data_parallel_size_local == 1:
|
||||
# Use full external lb if we have local_size of 1.
|
||||
data_parallel_external_lb = True
|
||||
self.data_parallel_hybrid_lb = False
|
||||
|
||||
if data_parallel_size_local == self.data_parallel_size:
|
||||
# Disable hybrid LB mode if set for a single node
|
||||
self.data_parallel_hybrid_lb = False
|
||||
|
||||
self.data_parallel_rank = self.data_parallel_start_rank or 0
|
||||
else:
|
||||
assert not self.data_parallel_hybrid_lb, (
|
||||
"data_parallel_size_local must be set to use "
|
||||
"data_parallel_hybrid_lb.")
|
||||
|
||||
# Local DP size defaults to global DP size if not set.
|
||||
data_parallel_size_local = self.data_parallel_size
|
||||
|
||||
@@ -1125,6 +1162,7 @@ class EngineArgs:
|
||||
data_parallel_master_ip=data_parallel_address,
|
||||
data_parallel_rpc_port=data_parallel_rpc_port,
|
||||
data_parallel_backend=self.data_parallel_backend,
|
||||
data_parallel_hybrid_lb=self.data_parallel_hybrid_lb,
|
||||
enable_expert_parallel=self.enable_expert_parallel,
|
||||
enable_eplb=self.enable_eplb,
|
||||
num_redundant_experts=self.num_redundant_experts,
|
||||
|
||||
Reference in New Issue
Block a user