[Quantization][MoE] remove unused ep logic from moe marlin (#31571)

Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>
Co-authored-by: Michael Goin <mgoin64@gmail.com>
This commit is contained in:
Jinzhen Lin
2026-01-07 01:07:19 +08:00
committed by GitHub
parent 28c94770ad
commit 2f4bdee61e
6 changed files with 31 additions and 60 deletions

View File

@@ -135,7 +135,6 @@ def _fused_marlin_moe(
moe_block_size=block_size_m,
top_k=num_topk,
mul_topk_weights=apply_router_weight_on_input,
is_ep=expert_map is not None,
b_q_type=quant_type,
size_m=M,
size_n=2 * N,
@@ -187,7 +186,6 @@ def _fused_marlin_moe(
moe_block_size=block_size_m,
top_k=1,
mul_topk_weights=not apply_router_weight_on_input,
is_ep=expert_map is not None,
b_q_type=quant_type,
size_m=M * num_topk,
size_n=K,