[MoE Refactor] Create MK for TRTLLM Kernels (#32564)

Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: Robert Shaw <robertgshaw2@gmail.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
2026-03-03 13:39:50 -05:00
parent 881a6b011b
commit 97995f6376
77 changed files with 2575 additions and 2087 deletions
--- a/tests/kernels/moe/modular_kernel_tools/common.py
+++ b/tests/kernels/moe/modular_kernel_tools/common.py
@@ -66,7 +66,7 @@ class Config:
    quant_config: TestMoEQuantConfig | None

    prepare_finalize_type: mk.FusedMoEPrepareAndFinalize
-    fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute
+    fused_experts_type: mk.FusedMoEExperts

    fused_moe_chunk_size: int | None
    world_size: int
@@ -566,7 +566,7 @@ def make_modular_kernel(
    config: Config,
    vllm_config: VllmConfig,
    quant_config: FusedMoEQuantConfig,
-) -> mk.FusedMoEModularKernel:
+) -> mk.FusedMoEKernel:
    def next_power_of_2(x):
        import math

@@ -613,7 +613,7 @@ def make_modular_kernel(
        config.N,
    )

-    modular_kernel = mk.FusedMoEModularKernel(
+    modular_kernel = mk.FusedMoEKernel(
        prepare_finalize=prepare_finalize,
        fused_experts=fused_experts,
        inplace=False,
@@ -667,6 +667,7 @@ def run_modular_kernel(
        "w2": rank_weights.w2,
        "topk_weights": rank_tensors.topk_weights,
        "topk_ids": topk_ids,
+        "activation": MoEActivation.SILU,
        "expert_map": rank_tensors.expert_map,
        "global_num_experts": config.E,
        "apply_router_weight_on_input": config.topk == 1
@@ -684,6 +685,6 @@ def run_modular_kernel(
        num_tokens=num_tokens,
        num_tokens_across_dp=num_tokens_across_dp,
    ):
-        out = mk.forward(**mk_kwargs)
+        out = mk.apply(**mk_kwargs)

    return out