[MoE Refactor] Move select_experts from FusedMoEQuantMethod -> FusedMoE (#31996)

Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
bnellnm
2026-01-22 18:21:35 -05:00
committed by GitHub
parent fc56f4a071
commit dc917cceb8
22 changed files with 498 additions and 533 deletions

View File

@@ -957,18 +957,18 @@ class MarlinMoEWeightData:
)
@pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
def test_fused_marlin_moe(
a_type,
b_type,
c_type,
group_blocks,
m,
n,
k,
e,
topk,
ep_size,
act_order,
is_k_full,
a_type: ScalarType,
b_type: ScalarType,
c_type: ScalarType,
group_blocks: int,
m: int,
n: int,
k: int,
e: int,
topk: int,
ep_size: int,
act_order: bool,
is_k_full: bool,
):
torch.cuda.manual_seed(1)
group_size = group_blocks if group_blocks <= 0 else group_blocks * 16
@@ -1044,7 +1044,6 @@ def test_fused_marlin_moe(
None,
w1_data.scales,
w2_data.scales,
score,
topk_weights,
topk_ids,
global_num_experts=e,
@@ -1120,7 +1119,6 @@ def test_fused_marlin_moe_with_bias(m):
w2_data.marlin_bias,
w1_data.scales,
w2_data.scales,
score,
topk_weights,
topk_ids,
global_num_experts=e,
@@ -1199,7 +1197,6 @@ def test_fused_marlin_moe_non_gated(m: int, n: int, k: int, e: int, topk: int):
None, # bias2
w1_data.scales,
w2_data.scales,
score,
topk_weights,
topk_ids,
global_num_experts=e,
@@ -1519,7 +1516,6 @@ def test_batched_fused_marlin_moe(
"bias2": None,
"w1_scale": w1_data.scales,
"w2_scale": w2_data.scales,
"gating_output": score,
"global_num_experts": e,
"expert_map": None,
"global_scale1": w1_data.global_scale,