[Core] Simplify the Dp padding/should ubatch coordination logic (#25768)

Signed-off-by: Sage Moore <sage@neuralmagic.com>
Signed-off-by: mgoin <mgoin64@gmail.com>
Co-authored-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Sage Moore
2025-10-06 18:57:49 -07:00
committed by GitHub
parent c50901f3b9
commit 2111b4643c
10 changed files with 297 additions and 462 deletions

View File

@@ -365,6 +365,9 @@ class EngineArgs:
enable_dbo: bool = ParallelConfig.enable_dbo
dbo_decode_token_threshold: int = ParallelConfig.dbo_decode_token_threshold
dbo_prefill_token_threshold: int = ParallelConfig.dbo_prefill_token_threshold
disable_nccl_for_dp_synchronization: bool = (
ParallelConfig.disable_nccl_for_dp_synchronization
)
eplb_config: EPLBConfig = get_field(ParallelConfig, "eplb_config")
enable_eplb: bool = ParallelConfig.enable_eplb
expert_placement_strategy: ExpertPlacementStrategy = (
@@ -760,6 +763,10 @@ class EngineArgs:
"--dbo-prefill-token-threshold",
**parallel_kwargs["dbo_prefill_token_threshold"],
)
parallel_group.add_argument(
"--disable-nccl-for-dp-synchronization",
**parallel_kwargs["disable_nccl_for_dp_synchronization"],
)
parallel_group.add_argument("--enable-eplb", **parallel_kwargs["enable_eplb"])
parallel_group.add_argument("--eplb-config", **parallel_kwargs["eplb_config"])
parallel_group.add_argument(
@@ -1437,6 +1444,7 @@ class EngineArgs:
enable_dbo=self.enable_dbo,
dbo_decode_token_threshold=self.dbo_decode_token_threshold,
dbo_prefill_token_threshold=self.dbo_prefill_token_threshold,
disable_nccl_for_dp_synchronization=self.disable_nccl_for_dp_synchronization,
enable_eplb=self.enable_eplb,
eplb_config=self.eplb_config,
expert_placement_strategy=self.expert_placement_strategy,