[BugFix] Support online dense model DP without overhead (#30739)

Signed-off-by: Nick Hill <nhill@redhat.com>
Signed-off-by: njhill <nickhill123@gmail.com>
This commit is contained in:
Nick Hill
2026-01-02 07:36:38 -08:00
committed by GitHub
parent 08f425bad1
commit bd877162eb
20 changed files with 345 additions and 146 deletions

View File

@@ -119,6 +119,8 @@ class ParallelConfig:
between local data parallel ranks, but an external LB balances
between vLLM nodes/replicas. Set explicitly in conjunction with
--data-parallel-start-rank."""
is_moe_model: bool | None = None
"""Whether the deployed model is MoE (if known)."""
enable_expert_parallel: bool = False
"""Use expert parallelism instead of tensor parallelism for MoE layers."""
enable_eplb: bool = False
@@ -255,6 +257,10 @@ class ParallelConfig:
Block_size should be divisible by cp_kv_cache_interleave_size.
"""
data_parallel_index: int = Field(init=False)
"""Equal to the data parallel rank but not used for torch process groups
and not overridden for dense models."""
_api_process_count: int = Field(default=1, gt=0)
"""
The number of API processes initialized.
@@ -466,6 +472,7 @@ class ParallelConfig:
"data_parallel_rank",
"data_parallel_rank_local",
"data_parallel_size_local",
"data_parallel_index",
"data_parallel_backend",
"data_parallel_external_lb",
"data_parallel_hybrid_lb",
@@ -546,6 +553,14 @@ class ParallelConfig:
self.data_parallel_master_ip = envs.VLLM_DP_MASTER_IP
self.data_parallel_master_port = envs.VLLM_DP_MASTER_PORT
if self.data_parallel_size > 1 and self.is_moe_model is False:
raise ValueError(
"Offline data parallel mode is not supported/useful"
" for dense models."
)
self.data_parallel_index = self.data_parallel_rank
if self.distributed_executor_backend == "external_launcher":
os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
logger.info("Disabling V1 multiprocessing for external launcher.")