[Kernel] Add FlashInfer MoE A2A Kernel (#36022)
Signed-off-by: wzhao18 <wzhao18.sz@gmail.com> Signed-off-by: Leo Tian <lctian@nvidia.com> Co-authored-by: wzhao18 <wzhao18.sz@gmail.com> Co-authored-by: Stefano Castagnetta <scastagnetta@nvidia.com> Co-authored-by: root <root@lyris0267.lyris.clusters.nvidia.com>
This commit is contained in:
@@ -150,7 +150,7 @@ def has_flashinfer_comm() -> bool:
|
||||
|
||||
|
||||
@functools.cache
|
||||
def has_flashinfer_all2all() -> bool:
|
||||
def has_flashinfer_nvlink_two_sided() -> bool:
|
||||
"""Return `True` if FlashInfer mnnvl all2all is available."""
|
||||
if not has_flashinfer_comm():
|
||||
return False
|
||||
@@ -170,6 +170,14 @@ def has_flashinfer_all2all() -> bool:
|
||||
return True
|
||||
|
||||
|
||||
@functools.cache
|
||||
def has_flashinfer_nvlink_one_sided() -> bool:
|
||||
"""Return `True` if FlashInfer trtllm_moe_alltoall module is available."""
|
||||
if not has_flashinfer_comm():
|
||||
return False
|
||||
return importlib.util.find_spec("flashinfer.comm.trtllm_moe_alltoall") is not None
|
||||
|
||||
|
||||
@functools.cache
|
||||
def has_flashinfer_moe() -> bool:
|
||||
"""Return `True` if FlashInfer MoE module is available."""
|
||||
@@ -766,7 +774,8 @@ __all__ = [
|
||||
"autotune",
|
||||
"has_flashinfer_moe",
|
||||
"has_flashinfer_comm",
|
||||
"has_flashinfer_all2all",
|
||||
"has_flashinfer_nvlink_two_sided",
|
||||
"has_flashinfer_nvlink_one_sided",
|
||||
"has_flashinfer_cutlass_fused_moe",
|
||||
"has_flashinfer_cutedsl_grouped_gemm_nt_masked",
|
||||
"has_flashinfer_fp8_blockscale_gemm",
|
||||
|
||||
Reference in New Issue
Block a user