[MoE][Refactor] Make select_experts a non-static method (#29067)

Signed-off-by: Bill Nell <bnell@redhat.com>
2025-11-24 13:38:04 -05:00
parent cec418b5df
commit 8f066146c3
18 changed files with 163 additions and 472 deletions
--- a/tests/kernels/moe/test_flashinfer.py
+++ b/tests/kernels/moe/test_flashinfer.py
@@ -11,7 +11,6 @@ from vllm.model_executor.layers.fused_moe.config import (
    fp8_w8a8_moe_quant_config,
 )
 from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts
-from vllm.model_executor.layers.fused_moe.layer import FusedMoE
 from vllm.model_executor.layers.quantization.utils.flashinfer_utils import (
    apply_flashinfer_per_tensor_scale_fp8,
    flashinfer_cutlass_moe_fp8,
@@ -151,14 +150,11 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph(
        td = TestData.make_moe_tensors_8bit(m, k, n, e, reorder=True)

        score = torch.randn((m, e), device="cuda", dtype=torch.bfloat16)
-        topk_weights, topk_ids, _ = FusedMoE.select_experts(
+        topk_weights, topk_ids = Llama4MoE.custom_routing_function(
            hidden_states=td.hidden_states,
-            router_logits=score,
-            use_grouped_topk=False,
-            top_k=topk,
+            gating_output=score,
+            topk=topk,
            renormalize=False,
-            custom_routing_function=Llama4MoE.custom_routing_function,
-            scoring_func="softmax",
        )

        quant_config = fp8_w8a8_moe_quant_config(
@@ -219,14 +215,11 @@ def test_flashinfer_cutlass_moe_fp8_no_graph(
        )

        score = torch.randn((m, e), device="cuda", dtype=torch.bfloat16)
-        topk_weights, topk_ids, _ = FusedMoE.select_experts(
+        topk_weights, topk_ids = Llama4MoE.custom_routing_function(
            hidden_states=td.hidden_states,
-            router_logits=score,
-            use_grouped_topk=False,
-            top_k=topk,
+            gating_output=score,
+            topk=topk,
            renormalize=False,
-            custom_routing_function=Llama4MoE.custom_routing_function,
-            scoring_func="softmax",
        )

        quant_config = fp8_w8a8_moe_quant_config(