[Kernel] DeepGemm MoE : Integrate triton permute / unpermute kernels (#20903)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
Varun Sundar Rabindranath
2025-07-17 13:40:37 +05:30
committed by GitHub
parent fdc5b43d20
commit 11dfdf21bf
10 changed files with 490 additions and 58 deletions

View File

@@ -271,6 +271,7 @@ class CutlassExpertsFp8(mk.FusedMoEPermuteExpertsUnpermute):
topk: int,
global_num_experts: int,
local_num_experts: int,
expert_tokens_meta: Optional[mk.ExpertTokensMetadata],
) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...], torch.dtype]:
workspace1: tuple[int, ...] = ()
workspace2: tuple[int, ...] = ()