[MoE Refactor] Introduce MoERunner abstraction and move execution logic from FusedMoE to DefaultMoERunner (#32344)

Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
bnellnm
2026-02-10 19:51:07 -05:00
committed by GitHub
parent dc6de33c3d
commit d1481ba783
25 changed files with 913 additions and 753 deletions

View File

@@ -585,6 +585,7 @@ def make_modular_kernel(
tp_size_=get_tensor_model_parallel_world_size(),
pcp_size_=get_pcp_group().world_size,
dp_size_=get_dp_group().world_size,
sp_size_=1,
vllm_parallel_config=vllm_config.parallel_config,
)
@@ -594,6 +595,7 @@ def make_modular_kernel(
hidden_dim=config.K,
intermediate_size_per_partition=config.N,
num_local_experts=config.num_local_experts,
num_logical_experts=config.E,
moe_parallel_config=moe_parallel_config,
in_dtype=config.dtype,
max_num_tokens=next_power_of_2(config.M),