[MoE Refactor] Create MK for TRTLLM Kernels (#32564)

Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: Robert Shaw <robertgshaw2@gmail.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
2026-03-03 13:39:50 -05:00
parent 881a6b011b
commit 97995f6376
77 changed files with 2575 additions and 2087 deletions
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -346,14 +346,16 @@ def test_fused_moe(
        expert_map: torch.Tensor | None = None,
    ) -> torch.Tensor:
        topk_weights, topk_ids, _ = fused_topk(a, score, topk, False)
-        return m_fused_moe_fn(
+        return m_fused_moe_fn.apply(
            a,
            w1,
            w2,
            topk_weights,
            topk_ids,
+            activation=MoEActivation.SILU,
            global_num_experts=global_num_experts,
            expert_map=expert_map,
+            apply_router_weight_on_input=False,
        )

    fused_moe_fn = functools.partial(fused_moe, renormalize=False)
@@ -500,14 +502,16 @@ def test_naive_block_assignment_moe(
        expert_map: torch.Tensor | None = None,
    ) -> torch.Tensor:
        topk_weights, topk_ids, _ = fused_topk(a, score, topk, False)
-        return m_fused_moe_fn(
+        return m_fused_moe_fn.apply(
            a,
            w1,
            w2,
            topk_weights,
            topk_ids,
+            activation=MoEActivation.SILU,
            global_num_experts=global_num_experts,
            expert_map=expert_map,
+            apply_router_weight_on_input=False,
        )

    fused_moe_fn = functools.partial(fused_moe, renormalize=False)