[Quantization][MoE] remove unused ep logic from moe marlin (#31571)
Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
@@ -135,7 +135,6 @@ def _fused_marlin_moe(
|
||||
moe_block_size=block_size_m,
|
||||
top_k=num_topk,
|
||||
mul_topk_weights=apply_router_weight_on_input,
|
||||
is_ep=expert_map is not None,
|
||||
b_q_type=quant_type,
|
||||
size_m=M,
|
||||
size_n=2 * N,
|
||||
@@ -187,7 +186,6 @@ def _fused_marlin_moe(
|
||||
moe_block_size=block_size_m,
|
||||
top_k=1,
|
||||
mul_topk_weights=not apply_router_weight_on_input,
|
||||
is_ep=expert_map is not None,
|
||||
b_q_type=quant_type,
|
||||
size_m=M * num_topk,
|
||||
size_n=K,
|
||||
|
||||
Reference in New Issue
Block a user