[Feat] Refactor for parallel_config in FusedMoEModularKernel (#30282)

Signed-off-by: yewentao256 <zhyanwentao@126.com>
Signed-off-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <robshaw@redhat.com>
Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
Wentao Ye
2025-12-14 23:21:36 -05:00
committed by GitHub
parent b337647aa0
commit 3778673ea8
8 changed files with 32 additions and 27 deletions

View File

@@ -460,7 +460,6 @@ def cutlass_moe_fp8(
expert_map: torch.Tensor | None = None,
apply_router_weight_on_input: bool = False,
global_num_experts: int = -1,
parallel_config=None,
) -> torch.Tensor:
"""
This function computes a a8w8-quantized Mixture of Experts (MoE) layer
@@ -538,7 +537,6 @@ def cutlass_moe_fp8(
c_strides2=c_strides2,
quant_config=quant_config,
),
parallel_config=parallel_config,
)
return fn(