[Kernel] DeepGemm MoE : Integrate triton permute / unpermute kernels (#20903)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
2025-07-17 13:40:37 +05:30
parent fdc5b43d20
commit 11dfdf21bf
10 changed files with 490 additions and 58 deletions
--- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py
@@ -271,6 +271,7 @@ class CutlassExpertsFp8(mk.FusedMoEPermuteExpertsUnpermute):
        topk: int,
        global_num_experts: int,
        local_num_experts: int,
+        expert_tokens_meta: Optional[mk.ExpertTokensMetadata],
    ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...], torch.dtype]:
        workspace1: tuple[int, ...] = ()
        workspace2: tuple[int, ...] = ()