[Feat] Refactor for parallel_config in FusedMoEModularKernel (#30282)

Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <robshaw@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
2025-12-14 23:21:36 -05:00
parent b337647aa0
commit 3778673ea8
8 changed files with 32 additions and 27 deletions
--- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py
+++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py
@@ -460,7 +460,6 @@ def cutlass_moe_fp8(
    expert_map: torch.Tensor | None = None,
    apply_router_weight_on_input: bool = False,
    global_num_experts: int = -1,
-    parallel_config=None,
 ) -> torch.Tensor:
    """
    This function computes a a8w8-quantized Mixture of Experts (MoE) layer
@@ -538,7 +537,6 @@ def cutlass_moe_fp8(
            c_strides2=c_strides2,
            quant_config=quant_config,
        ),
-        parallel_config=parallel_config,
    )

    return fn(