[UX] Replace VLLM_ALL2ALL_BACKEND with --all2all-backend (#26732)

Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
Michael Goin
2025-10-13 21:12:52 -04:00
committed by GitHub
parent 8317f72354
commit 3e051bda82
12 changed files with 90 additions and 51 deletions

View File

@@ -641,6 +641,7 @@ class FusedMoEParallelConfig:
ep_rank: int
use_ep: bool # whether to use EP or not
all2all_backend: str # all2all backend for MoE communication
@property
def use_all2all_kernels(self):
@@ -648,21 +649,18 @@ class FusedMoEParallelConfig:
@property
def use_pplx_kernels(self):
return self.use_all2all_kernels and envs.VLLM_ALL2ALL_BACKEND == "pplx"
return self.use_all2all_kernels and self.all2all_backend == "pplx"
@property
def use_deepep_ht_kernels(self):
return (
self.use_all2all_kernels
and envs.VLLM_ALL2ALL_BACKEND == "deepep_high_throughput"
and self.all2all_backend == "deepep_high_throughput"
)
@property
def use_deepep_ll_kernels(self):
return (
self.use_all2all_kernels
and envs.VLLM_ALL2ALL_BACKEND == "deepep_low_latency"
)
return self.use_all2all_kernels and self.all2all_backend == "deepep_low_latency"
@staticmethod
def make(
@@ -762,6 +760,7 @@ class FusedMoEParallelConfig:
ep_size=1,
ep_rank=0,
use_ep=False,
all2all_backend=vllm_parallel_config.all2all_backend,
)
# DP + EP / TP + EP / DP + TP + EP
assert use_ep
@@ -777,6 +776,7 @@ class FusedMoEParallelConfig:
ep_size=ep_size,
ep_rank=ep_rank,
use_ep=True,
all2all_backend=vllm_parallel_config.all2all_backend,
)

View File

@@ -58,7 +58,7 @@ def build_flashinfer_fp4_cutlass_moe_prepare_finalize(
) -> mk.FusedMoEPrepareAndFinalize:
"""Create a FlashInfer CUTLASS fused-MoE prepare finalize kernel"""
use_dp = moe.moe_parallel_config.dp_size > 1
enable_alltoallv = envs.VLLM_ALL2ALL_BACKEND == "flashinfer_all2allv"
enable_alltoallv = moe.moe_parallel_config.all2all_backend == "flashinfer_all2allv"
return create_flashinfer_prepare_finalize(
use_dp=use_dp, use_nvfp4=True, enable_alltoallv=enable_alltoallv
)