[MoE Refactor] Create MK for TRTLLM Kernels (#32564)
Signed-off-by: Robert Shaw <robshaw@redhat.com> Signed-off-by: Robert Shaw <rshaw@neuralmagic.com> Signed-off-by: Robert Shaw <robertgshaw2@gmail.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
@@ -66,7 +66,7 @@ class Config:
|
||||
quant_config: TestMoEQuantConfig | None
|
||||
|
||||
prepare_finalize_type: mk.FusedMoEPrepareAndFinalize
|
||||
fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute
|
||||
fused_experts_type: mk.FusedMoEExperts
|
||||
|
||||
fused_moe_chunk_size: int | None
|
||||
world_size: int
|
||||
@@ -566,7 +566,7 @@ def make_modular_kernel(
|
||||
config: Config,
|
||||
vllm_config: VllmConfig,
|
||||
quant_config: FusedMoEQuantConfig,
|
||||
) -> mk.FusedMoEModularKernel:
|
||||
) -> mk.FusedMoEKernel:
|
||||
def next_power_of_2(x):
|
||||
import math
|
||||
|
||||
@@ -613,7 +613,7 @@ def make_modular_kernel(
|
||||
config.N,
|
||||
)
|
||||
|
||||
modular_kernel = mk.FusedMoEModularKernel(
|
||||
modular_kernel = mk.FusedMoEKernel(
|
||||
prepare_finalize=prepare_finalize,
|
||||
fused_experts=fused_experts,
|
||||
inplace=False,
|
||||
@@ -667,6 +667,7 @@ def run_modular_kernel(
|
||||
"w2": rank_weights.w2,
|
||||
"topk_weights": rank_tensors.topk_weights,
|
||||
"topk_ids": topk_ids,
|
||||
"activation": MoEActivation.SILU,
|
||||
"expert_map": rank_tensors.expert_map,
|
||||
"global_num_experts": config.E,
|
||||
"apply_router_weight_on_input": config.topk == 1
|
||||
@@ -684,6 +685,6 @@ def run_modular_kernel(
|
||||
num_tokens=num_tokens,
|
||||
num_tokens_across_dp=num_tokens_across_dp,
|
||||
):
|
||||
out = mk.forward(**mk_kwargs)
|
||||
out = mk.apply(**mk_kwargs)
|
||||
|
||||
return out
|
||||
|
||||
Reference in New Issue
Block a user