[Bugfix] Strengthen the check of X-data-parallel-rank in Hybrid LB mode (#32314)
Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
This commit is contained in:
@@ -62,12 +62,10 @@ class DPCoordinator:
|
||||
assert dp_size > 1, "Coordinator only used for data parallel"
|
||||
|
||||
host = parallel_config.data_parallel_master_ip
|
||||
external_lb = parallel_config.data_parallel_external_lb
|
||||
hybrid_lb = parallel_config.data_parallel_hybrid_lb
|
||||
|
||||
# Assume coordinator is colocated with front-end procs when not in
|
||||
# either external or hybrid DP LB mode.
|
||||
local_only = not (external_lb or hybrid_lb)
|
||||
local_only = not parallel_config.local_engines_only
|
||||
front_publish_address = get_engine_client_zmq_addr(
|
||||
local_only=local_only, host=host
|
||||
)
|
||||
|
||||
@@ -507,12 +507,7 @@ class MPClient(EngineCoreClient):
|
||||
offline_mode = parallel_config.data_parallel_rank_local is not None
|
||||
# Client manages local+remote EngineCores in pure internal LB case.
|
||||
# Client manages local EngineCores in hybrid and external LB case.
|
||||
local_engines_only = (
|
||||
parallel_config.data_parallel_hybrid_lb
|
||||
or parallel_config.data_parallel_external_lb
|
||||
)
|
||||
|
||||
num_ranks = dp_local_size if local_engines_only else dp_size
|
||||
num_ranks = dp_local_size if parallel_config.local_engines_only else dp_size
|
||||
self.engine_ranks_managed = (
|
||||
[dp_rank] if offline_mode else list(range(dp_rank, dp_rank + num_ranks))
|
||||
)
|
||||
|
||||
@@ -458,13 +458,14 @@ class InputProcessor:
|
||||
self._validate_lora(lora_request)
|
||||
self._validate_params(params)
|
||||
|
||||
data_parallel_size = self.vllm_config.parallel_config.data_parallel_size
|
||||
if data_parallel_rank is not None and not (
|
||||
0 <= data_parallel_rank < data_parallel_size
|
||||
):
|
||||
parallel_config = self.vllm_config.parallel_config
|
||||
dp_size = parallel_config.data_parallel_size
|
||||
dp_local_size = parallel_config.data_parallel_size_local
|
||||
num_ranks = dp_local_size if parallel_config.local_engines_only else dp_size
|
||||
if data_parallel_rank is not None and not (0 <= data_parallel_rank < num_ranks):
|
||||
raise ValueError(
|
||||
f"data_parallel_rank {data_parallel_rank} "
|
||||
f"is out of range [0, {data_parallel_size})."
|
||||
f"is out of range [0, {num_ranks})."
|
||||
)
|
||||
|
||||
if arrival_time is None:
|
||||
|
||||
@@ -787,10 +787,7 @@ def launch_core_engines(
|
||||
local_start_index = parallel_config.data_parallel_rank_local
|
||||
dp_rank = parallel_config.data_parallel_rank
|
||||
host = parallel_config.data_parallel_master_ip
|
||||
local_engines_only = (
|
||||
parallel_config.data_parallel_hybrid_lb
|
||||
or parallel_config.data_parallel_external_lb
|
||||
)
|
||||
local_engines_only = parallel_config.local_engines_only
|
||||
|
||||
# In offline mode there is an LLM instance per DP rank and
|
||||
# one core engine per LLM, see
|
||||
|
||||
Reference in New Issue
Block a user