[Kernel] Add FlashInfer MoE A2A Kernel (#36022)

Signed-off-by: wzhao18 <wzhao18.sz@gmail.com> Signed-off-by: Leo Tian <lctian@nvidia.com> Co-authored-by: wzhao18 <wzhao18.sz@gmail.com> Co-authored-by: Stefano Castagnetta <scastagnetta@nvidia.com> Co-authored-by: root <root@lyris0267.lyris.clusters.nvidia.com>
2026-03-16 02:45:32 -04:00
parent 2390d44209
commit 2754231ba3
19 changed files with 417 additions and 43 deletions
--- a/vllm/utils/flashinfer.py
+++ b/vllm/utils/flashinfer.py
@@ -150,7 +150,7 @@ def has_flashinfer_comm() -> bool:


@functools.cache
-def has_flashinfer_all2all() -> bool:
+def has_flashinfer_nvlink_two_sided() -> bool:
    """Return `True` if FlashInfer mnnvl all2all is available."""
    if not has_flashinfer_comm():
        return False
@@ -170,6 +170,14 @@ def has_flashinfer_all2all() -> bool:
    return True


+@functools.cache
+def has_flashinfer_nvlink_one_sided() -> bool:
+    """Return `True` if FlashInfer trtllm_moe_alltoall module is available."""
+    if not has_flashinfer_comm():
+        return False
+    return importlib.util.find_spec("flashinfer.comm.trtllm_moe_alltoall") is not None
+
+
@functools.cache
 def has_flashinfer_moe() -> bool:
    """Return `True` if FlashInfer MoE module is available."""
@@ -766,7 +774,8 @@ __all__ = [
    "autotune",
    "has_flashinfer_moe",
    "has_flashinfer_comm",
-    "has_flashinfer_all2all",
+    "has_flashinfer_nvlink_two_sided",
+    "has_flashinfer_nvlink_one_sided",
    "has_flashinfer_cutlass_fused_moe",
    "has_flashinfer_cutedsl_grouped_gemm_nt_masked",
    "has_flashinfer_fp8_blockscale_gemm",