[MoE][Refactor] Make select_experts a non-static method (#29067)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-11-24 13:38:04 -05:00
parent cec418b5df
commit 8f066146c3
18 changed files with 163 additions and 472 deletions
--- a/vllm/model_executor/layers/quantization/gptq_marlin.py
+++ b/vllm/model_executor/layers/quantization/gptq_marlin.py
@@ -722,7 +722,7 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):

    def apply(
        self,
-        layer: torch.nn.Module,
+        layer: FusedMoE,
        x: torch.Tensor,
        router_logits: torch.Tensor,
        top_k: int,
@@ -743,26 +743,11 @@ class GPTQMarlinMoEMethod(FusedMoEMethodBase):
        logical_to_physical_map: torch.Tensor | None = None,
        logical_replica_count: torch.Tensor | None = None,
    ) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
-        if enable_eplb:
-            raise NotImplementedError(
-                "EPLB not supported for `GPTQMarlinMoEMethod` yet."
-            )
-
        assert activation == "silu", "Only SiLU activation is supported."

-        topk_weights, topk_ids, _ = FusedMoE.select_experts(
+        topk_weights, topk_ids, _ = layer.select_experts(
            hidden_states=x,
            router_logits=router_logits,
-            use_grouped_topk=use_grouped_topk,
-            top_k=top_k,
-            renormalize=renormalize,
-            topk_group=topk_group,
-            num_expert_group=num_expert_group,
-            custom_routing_function=custom_routing_function,
-            scoring_func=scoring_func,
-            routed_scaling_factor=routed_scaling_factor,
-            e_score_correction_bias=e_score_correction_bias,
-            indices_type=self.topk_indices_dtype,
        )

        return fused_marlin_moe(