[2/N] Elastic EP Milestone 2: Integrating NIXL-EP (#35627)

Signed-off-by: Itay Alroy <ialroy@nvidia.com>
Co-authored-by: Yongji Wu <wuyongji317@gmail.com>
Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
This commit is contained in:
Itay Alroy
2026-03-13 15:25:33 +02:00
committed by GitHub
parent 82f836d976
commit d5af196c18
14 changed files with 635 additions and 11 deletions

View File

@@ -43,6 +43,7 @@ All2AllBackend = Literal[
"deepep_high_throughput",
"deepep_low_latency",
"mori",
"nixl_ep",
"allgather_reducescatter",
"flashinfer_all2allv",
]
@@ -156,6 +157,7 @@ class ParallelConfig:
- "deepep_high_throughput": Use deepep high-throughput kernels\n
- "deepep_low_latency": Use deepep low-latency kernels\n
- "mori": Use mori kernels\n
- "nixl_ep": Use nixl-ep kernels\n
- "flashinfer_all2allv": Use flashinfer alltoallv kernels for mnnvl"""
max_parallel_loading_workers: int | None = None
@@ -580,6 +582,7 @@ class ParallelConfig:
"deepep_high_throughput",
"deepep_low_latency",
"mori",
"nixl_ep",
)
and self.enable_expert_parallel
and self.tensor_parallel_size > 1