[MoE Refactor] Introduce MoERunner abstraction and move execution logic from FusedMoE to DefaultMoERunner (#32344)
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
@@ -585,6 +585,7 @@ def make_modular_kernel(
|
||||
tp_size_=get_tensor_model_parallel_world_size(),
|
||||
pcp_size_=get_pcp_group().world_size,
|
||||
dp_size_=get_dp_group().world_size,
|
||||
sp_size_=1,
|
||||
vllm_parallel_config=vllm_config.parallel_config,
|
||||
)
|
||||
|
||||
@@ -594,6 +595,7 @@ def make_modular_kernel(
|
||||
hidden_dim=config.K,
|
||||
intermediate_size_per_partition=config.N,
|
||||
num_local_experts=config.num_local_experts,
|
||||
num_logical_experts=config.E,
|
||||
moe_parallel_config=moe_parallel_config,
|
||||
in_dtype=config.dtype,
|
||||
max_num_tokens=next_power_of_2(config.M),
|
||||
|
||||
Reference in New Issue
Block a user