[Perf] use cpu all reduce to avoid sync when async_scheduling & dp > 1 (#29311)
Signed-off-by: zhuhaoran <zhuhaoran.zhr@alibaba-inc.com>
This commit is contained in:
@@ -1570,6 +1570,12 @@ class EngineArgs:
|
|||||||
model_config.skip_tokenizer_init = True
|
model_config.skip_tokenizer_init = True
|
||||||
logger.info("Skipping tokenizer initialization for tokens-only mode.")
|
logger.info("Skipping tokenizer initialization for tokens-only mode.")
|
||||||
|
|
||||||
|
if self.async_scheduling and not self.disable_nccl_for_dp_synchronization:
|
||||||
|
logger.info(
|
||||||
|
"Disabling NCCL for DP synchronization when using async scheduling."
|
||||||
|
)
|
||||||
|
self.disable_nccl_for_dp_synchronization = True
|
||||||
|
|
||||||
# Forward the deprecated CLI args to the EPLB config.
|
# Forward the deprecated CLI args to the EPLB config.
|
||||||
if self.num_redundant_experts is not None:
|
if self.num_redundant_experts is not None:
|
||||||
self.eplb_config.num_redundant_experts = self.num_redundant_experts
|
self.eplb_config.num_redundant_experts = self.num_redundant_experts
|
||||||
|
|||||||
Reference in New Issue
Block a user