[MoE Refactor] Create MK for TRTLLM Kernels (#32564)

Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
Signed-off-by: Robert Shaw <robertgshaw2@gmail.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
Robert Shaw
2026-03-03 13:39:50 -05:00
committed by GitHub
parent 881a6b011b
commit 97995f6376
77 changed files with 2575 additions and 2087 deletions

View File

@@ -346,14 +346,16 @@ def test_fused_moe(
expert_map: torch.Tensor | None = None,
) -> torch.Tensor:
topk_weights, topk_ids, _ = fused_topk(a, score, topk, False)
return m_fused_moe_fn(
return m_fused_moe_fn.apply(
a,
w1,
w2,
topk_weights,
topk_ids,
activation=MoEActivation.SILU,
global_num_experts=global_num_experts,
expert_map=expert_map,
apply_router_weight_on_input=False,
)
fused_moe_fn = functools.partial(fused_moe, renormalize=False)
@@ -500,14 +502,16 @@ def test_naive_block_assignment_moe(
expert_map: torch.Tensor | None = None,
) -> torch.Tensor:
topk_weights, topk_ids, _ = fused_topk(a, score, topk, False)
return m_fused_moe_fn(
return m_fused_moe_fn.apply(
a,
w1,
w2,
topk_weights,
topk_ids,
activation=MoEActivation.SILU,
global_num_experts=global_num_experts,
expert_map=expert_map,
apply_router_weight_on_input=False,
)
fused_moe_fn = functools.partial(fused_moe, renormalize=False)