[Bugfix] Strengthen the check of X-data-parallel-rank in Hybrid LB mode (#32314)

Signed-off-by: Tianchen Ding <dtcccc@linux.alibaba.com>
This commit is contained in:
dtc
2026-01-15 16:31:16 +08:00
committed by GitHub
parent 4c1c501a7e
commit 1e584823f8
7 changed files with 20 additions and 22 deletions

View File

@@ -62,12 +62,10 @@ class DPCoordinator:
assert dp_size > 1, "Coordinator only used for data parallel"
host = parallel_config.data_parallel_master_ip
external_lb = parallel_config.data_parallel_external_lb
hybrid_lb = parallel_config.data_parallel_hybrid_lb
# Assume coordinator is colocated with front-end procs when not in
# either external or hybrid DP LB mode.
local_only = not (external_lb or hybrid_lb)
local_only = not parallel_config.local_engines_only
front_publish_address = get_engine_client_zmq_addr(
local_only=local_only, host=host
)

View File

@@ -507,12 +507,7 @@ class MPClient(EngineCoreClient):
offline_mode = parallel_config.data_parallel_rank_local is not None
# Client manages local+remote EngineCores in pure internal LB case.
# Client manages local EngineCores in hybrid and external LB case.
local_engines_only = (
parallel_config.data_parallel_hybrid_lb
or parallel_config.data_parallel_external_lb
)
num_ranks = dp_local_size if local_engines_only else dp_size
num_ranks = dp_local_size if parallel_config.local_engines_only else dp_size
self.engine_ranks_managed = (
[dp_rank] if offline_mode else list(range(dp_rank, dp_rank + num_ranks))
)

View File

@@ -458,13 +458,14 @@ class InputProcessor:
self._validate_lora(lora_request)
self._validate_params(params)
data_parallel_size = self.vllm_config.parallel_config.data_parallel_size
if data_parallel_rank is not None and not (
0 <= data_parallel_rank < data_parallel_size
):
parallel_config = self.vllm_config.parallel_config
dp_size = parallel_config.data_parallel_size
dp_local_size = parallel_config.data_parallel_size_local
num_ranks = dp_local_size if parallel_config.local_engines_only else dp_size
if data_parallel_rank is not None and not (0 <= data_parallel_rank < num_ranks):
raise ValueError(
f"data_parallel_rank {data_parallel_rank} "
f"is out of range [0, {data_parallel_size})."
f"is out of range [0, {num_ranks})."
)
if arrival_time is None:

View File

@@ -787,10 +787,7 @@ def launch_core_engines(
local_start_index = parallel_config.data_parallel_rank_local
dp_rank = parallel_config.data_parallel_rank
host = parallel_config.data_parallel_master_ip
local_engines_only = (
parallel_config.data_parallel_hybrid_lb
or parallel_config.data_parallel_external_lb
)
local_engines_only = parallel_config.local_engines_only
# In offline mode there is an LLM instance per DP rank and
# one core engine per LLM, see