[MoE Refactor] Create MK for TRTLLM Kernels (#32564)

Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
Signed-off-by: Robert Shaw <robertgshaw2@gmail.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
Robert Shaw
2026-03-03 13:39:50 -05:00
committed by GitHub
parent 881a6b011b
commit 97995f6376
77 changed files with 2575 additions and 2087 deletions

View File

@@ -66,7 +66,7 @@ class Config:
quant_config: TestMoEQuantConfig | None
prepare_finalize_type: mk.FusedMoEPrepareAndFinalize
fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute
fused_experts_type: mk.FusedMoEExperts
fused_moe_chunk_size: int | None
world_size: int
@@ -566,7 +566,7 @@ def make_modular_kernel(
config: Config,
vllm_config: VllmConfig,
quant_config: FusedMoEQuantConfig,
) -> mk.FusedMoEModularKernel:
) -> mk.FusedMoEKernel:
def next_power_of_2(x):
import math
@@ -613,7 +613,7 @@ def make_modular_kernel(
config.N,
)
modular_kernel = mk.FusedMoEModularKernel(
modular_kernel = mk.FusedMoEKernel(
prepare_finalize=prepare_finalize,
fused_experts=fused_experts,
inplace=False,
@@ -667,6 +667,7 @@ def run_modular_kernel(
"w2": rank_weights.w2,
"topk_weights": rank_tensors.topk_weights,
"topk_ids": topk_ids,
"activation": MoEActivation.SILU,
"expert_map": rank_tensors.expert_map,
"global_num_experts": config.E,
"apply_router_weight_on_input": config.topk == 1
@@ -684,6 +685,6 @@ def run_modular_kernel(
num_tokens=num_tokens,
num_tokens_across_dp=num_tokens_across_dp,
):
out = mk.forward(**mk_kwargs)
out = mk.apply(**mk_kwargs)
return out