[MoE Refactor] Move select_experts from FusedMoEQuantMethod -> FusedMoE (#31996)
Signed-off-by: Bill Nell <bnell@redhat.com>
This commit is contained in:
@@ -957,18 +957,18 @@ class MarlinMoEWeightData:
|
||||
)
|
||||
@pytest.mark.skipif(current_platform.is_rocm(), reason="Skip for rocm")
|
||||
def test_fused_marlin_moe(
|
||||
a_type,
|
||||
b_type,
|
||||
c_type,
|
||||
group_blocks,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
e,
|
||||
topk,
|
||||
ep_size,
|
||||
act_order,
|
||||
is_k_full,
|
||||
a_type: ScalarType,
|
||||
b_type: ScalarType,
|
||||
c_type: ScalarType,
|
||||
group_blocks: int,
|
||||
m: int,
|
||||
n: int,
|
||||
k: int,
|
||||
e: int,
|
||||
topk: int,
|
||||
ep_size: int,
|
||||
act_order: bool,
|
||||
is_k_full: bool,
|
||||
):
|
||||
torch.cuda.manual_seed(1)
|
||||
group_size = group_blocks if group_blocks <= 0 else group_blocks * 16
|
||||
@@ -1044,7 +1044,6 @@ def test_fused_marlin_moe(
|
||||
None,
|
||||
w1_data.scales,
|
||||
w2_data.scales,
|
||||
score,
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
global_num_experts=e,
|
||||
@@ -1120,7 +1119,6 @@ def test_fused_marlin_moe_with_bias(m):
|
||||
w2_data.marlin_bias,
|
||||
w1_data.scales,
|
||||
w2_data.scales,
|
||||
score,
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
global_num_experts=e,
|
||||
@@ -1199,7 +1197,6 @@ def test_fused_marlin_moe_non_gated(m: int, n: int, k: int, e: int, topk: int):
|
||||
None, # bias2
|
||||
w1_data.scales,
|
||||
w2_data.scales,
|
||||
score,
|
||||
topk_weights,
|
||||
topk_ids,
|
||||
global_num_experts=e,
|
||||
@@ -1519,7 +1516,6 @@ def test_batched_fused_marlin_moe(
|
||||
"bias2": None,
|
||||
"w1_scale": w1_data.scales,
|
||||
"w2_scale": w2_data.scales,
|
||||
"gating_output": score,
|
||||
"global_num_experts": e,
|
||||
"expert_map": None,
|
||||
"global_scale1": w1_data.global_scale,
|
||||
|
||||
Reference in New Issue
Block a user