[Kernel] Add FlashInfer MoE A2A Kernel (#36022)

Signed-off-by: wzhao18 <wzhao18.sz@gmail.com>
Signed-off-by: Leo Tian <lctian@nvidia.com>
Co-authored-by: wzhao18 <wzhao18.sz@gmail.com>
Co-authored-by: Stefano Castagnetta <scastagnetta@nvidia.com>
Co-authored-by: root <root@lyris0267.lyris.clusters.nvidia.com>
This commit is contained in:
leo-cf-tian
2026-03-16 02:45:32 -04:00
committed by GitHub
parent 2390d44209
commit 2754231ba3
19 changed files with 417 additions and 43 deletions

View File

@@ -150,7 +150,7 @@ def has_flashinfer_comm() -> bool:
@functools.cache
def has_flashinfer_all2all() -> bool:
def has_flashinfer_nvlink_two_sided() -> bool:
"""Return `True` if FlashInfer mnnvl all2all is available."""
if not has_flashinfer_comm():
return False
@@ -170,6 +170,14 @@ def has_flashinfer_all2all() -> bool:
return True
@functools.cache
def has_flashinfer_nvlink_one_sided() -> bool:
"""Return `True` if FlashInfer trtllm_moe_alltoall module is available."""
if not has_flashinfer_comm():
return False
return importlib.util.find_spec("flashinfer.comm.trtllm_moe_alltoall") is not None
@functools.cache
def has_flashinfer_moe() -> bool:
"""Return `True` if FlashInfer MoE module is available."""
@@ -766,7 +774,8 @@ __all__ = [
"autotune",
"has_flashinfer_moe",
"has_flashinfer_comm",
"has_flashinfer_all2all",
"has_flashinfer_nvlink_two_sided",
"has_flashinfer_nvlink_one_sided",
"has_flashinfer_cutlass_fused_moe",
"has_flashinfer_cutedsl_grouped_gemm_nt_masked",
"has_flashinfer_fp8_blockscale_gemm",