[EPLB] Enforce sync eplb for NCCL-based all2all backend (#35212)

Signed-off-by: ilmarkov <markovilya197@gmail.com>
This commit is contained in:
Ilya Markov
2026-02-28 06:47:12 +01:00
committed by GitHub
parent 1d5ab5d603
commit b2d8b422b2

View File

@@ -774,6 +774,17 @@ class ParallelConfig:
"backend is mp, uni or external_launcher."
)
if (
self.all2all_backend in ("allgather_reducescatter", "naive")
and self.eplb_config.use_async
):
logger.warning(
"Async EPLB causes hangs with the '%s' all2all backend. "
"Forcing synchronous EPLB.",
self.all2all_backend,
)
self.eplb_config.use_async = False
@property
def use_ray(self) -> bool:
return self.distributed_executor_backend == "ray" or (