Add routed_scaling_factor to MoE grouped topk (#23123)

Signed-off-by: Xin Yang <xyangx@amazon.com> Co-authored-by: Michael Goin <mgoin64@gmail.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
2025-08-29 21:36:48 -07:00
parent 5b31cb1781
commit 8fb85b7bb6
19 changed files with 77 additions and 4 deletions
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -546,6 +546,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
        expert_map: Optional[torch.Tensor] = None,
        custom_routing_function: Optional[Callable] = None,
        scoring_func: str = "softmax",
+        routed_scaling_factor: float = 1.0,
        e_score_correction_bias: Optional[torch.Tensor] = None,
        apply_router_weight_on_input: bool = False,
        activation: str = "silu",
@@ -569,6 +570,7 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
                num_expert_group=num_expert_group,
                custom_routing_function=custom_routing_function,
                scoring_func=scoring_func,
+                routed_scaling_factor=routed_scaling_factor,
                e_score_correction_bias=e_score_correction_bias)

            return torch.ops.vllm.fused_marlin_moe(