[MoE Refactor] Create MK for TRTLLM Kernels (#32564)

Signed-off-by: Robert Shaw <robshaw@redhat.com>
Signed-off-by: Robert Shaw <rshaw@neuralmagic.com>
Signed-off-by: Robert Shaw <robertgshaw2@gmail.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <rshaw@neuralmagic.com>
This commit is contained in:
Robert Shaw
2026-03-03 13:39:50 -05:00
committed by GitHub
parent 881a6b011b
commit 97995f6376
77 changed files with 2575 additions and 2087 deletions

View File

@@ -168,7 +168,6 @@ FUSED_MOE_CHUNK_SIZEs = [None, 16]
def is_nyi_config(config: Config) -> bool:
# We know these configs to be legitimate. but still fail.
info = expert_info(config.fused_experts_type)
if info.needs_matching_quant:
# The triton kernels expect both per-act-token-quant and
# per-out-ch-quant or neither.
@@ -259,7 +258,7 @@ def test_modular_kernel_combinations_multigpu(
dtype: torch.dtype,
quant_config: TestMoEQuantConfig | None,
prepare_finalize_type: mk.FusedMoEPrepareAndFinalize,
fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute,
fused_experts_type: mk.FusedMoEExperts,
chunk_size: int | None,
world_size: int,
pytestconfig,
@@ -301,7 +300,7 @@ def test_modular_kernel_combinations_singlegpu(
dtype: torch.dtype,
quant_config: TestMoEQuantConfig | None,
prepare_finalize_type: mk.FusedMoEPrepareAndFinalize,
fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute,
fused_experts_type: mk.FusedMoEExperts,
chunk_size: int | None,
world_size: int,
pytestconfig,