From b2d8b422b2014b23c44fea703c70331eef35e7a1 Mon Sep 17 00:00:00 2001 From: Ilya Markov Date: Sat, 28 Feb 2026 06:47:12 +0100 Subject: [PATCH] [EPLB] Enforce sync eplb for NCCL-based all2all backend (#35212) Signed-off-by: ilmarkov --- vllm/config/parallel.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 59df4a214..6e84cf16b 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -774,6 +774,17 @@ class ParallelConfig: "backend is mp, uni or external_launcher." ) + if ( + self.all2all_backend in ("allgather_reducescatter", "naive") + and self.eplb_config.use_async + ): + logger.warning( + "Async EPLB causes hangs with the '%s' all2all backend. " + "Forcing synchronous EPLB.", + self.all2all_backend, + ) + self.eplb_config.use_async = False + @property def use_ray(self) -> bool: return self.distributed_executor_backend == "ray" or (