[2/N] Elastic EP Milestone 2: Integrating NIXL-EP (#35627)

Signed-off-by: Itay Alroy <ialroy@nvidia.com> Co-authored-by: Yongji Wu <wuyongji317@gmail.com> Co-authored-by: Ron Tourgeman <rtourgeman@nvidia.com>
2026-03-13 15:25:33 +02:00
parent 82f836d976
commit d5af196c18
14 changed files with 635 additions and 11 deletions
--- a/vllm/config/parallel.py
+++ b/vllm/config/parallel.py
@@ -43,6 +43,7 @@ All2AllBackend = Literal[
    "deepep_high_throughput",
    "deepep_low_latency",
    "mori",
+    "nixl_ep",
    "allgather_reducescatter",
    "flashinfer_all2allv",
 ]
@@ -156,6 +157,7 @@ class ParallelConfig:
    - "deepep_high_throughput": Use deepep high-throughput kernels\n
    - "deepep_low_latency": Use deepep low-latency kernels\n
    - "mori": Use mori kernels\n
+    - "nixl_ep": Use nixl-ep kernels\n
    - "flashinfer_all2allv": Use flashinfer alltoallv kernels for mnnvl"""

    max_parallel_loading_workers: int | None = None
@@ -580,6 +582,7 @@ class ParallelConfig:
                "deepep_high_throughput",
                "deepep_low_latency",
                "mori",
+                "nixl_ep",
            )
            and self.enable_expert_parallel
            and self.tensor_parallel_size > 1