[MoE Refactor] Move select_experts from FusedMoEQuantMethod -> FusedMoE (#31996)

Signed-off-by: Bill Nell <bnell@redhat.com>
2026-01-22 18:21:35 -05:00
parent fc56f4a071
commit dc917cceb8
22 changed files with 498 additions and 533 deletions
--- a/tests/kernels/moe/test_moe.py
+++ b/tests/kernels/moe/test_moe.py
@@ -957,18 +957,18 @@ class MarlinMoEWeightData:
 )
@pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
 def test_fused_marlin_moe(
-    a_type,
-    b_type,
-    c_type,
-    group_blocks,
-    m,
-    n,
-    k,
-    e,
-    topk,
-    ep_size,
-    act_order,
-    is_k_full,
+    a_type: ScalarType,
+    b_type: ScalarType,
+    c_type: ScalarType,
+    group_blocks: int,
+    m: int,
+    n: int,
+    k: int,
+    e: int,
+    topk: int,
+    ep_size: int,
+    act_order: bool,
+    is_k_full: bool,
 ):
    torch.cuda.manual_seed(1)
    group_size = group_blocks if group_blocks <= 0 else group_blocks * 16
@@ -1044,7 +1044,6 @@ def test_fused_marlin_moe(
        None,
        w1_data.scales,
        w2_data.scales,
-        score,
        topk_weights,
        topk_ids,
        global_num_experts=e,
@@ -1120,7 +1119,6 @@ def test_fused_marlin_moe_with_bias(m):
        w2_data.marlin_bias,
        w1_data.scales,
        w2_data.scales,
-        score,
        topk_weights,
        topk_ids,
        global_num_experts=e,
@@ -1199,7 +1197,6 @@ def test_fused_marlin_moe_non_gated(m: int, n: int, k: int, e: int, topk: int):
        None,  # bias2
        w1_data.scales,
        w2_data.scales,
-        score,
        topk_weights,
        topk_ids,
        global_num_experts=e,
@@ -1519,7 +1516,6 @@ def test_batched_fused_marlin_moe(
        "bias2": None,
        "w1_scale": w1_data.scales,
        "w2_scale": w2_data.scales,
-        "gating_output": score,
        "global_num_experts": e,
        "expert_map": None,
        "global_scale1": w1_data.global_scale,