[Quantization][MoE] remove unused ep logic from moe marlin (#31571)

Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com> Co-authored-by: Michael Goin <mgoin64@gmail.com>
2026-01-07 01:07:19 +08:00
parent 28c94770ad
commit 2f4bdee61e
6 changed files with 31 additions and 60 deletions
--- a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
@@ -135,7 +135,6 @@ def _fused_marlin_moe(
        moe_block_size=block_size_m,
        top_k=num_topk,
        mul_topk_weights=apply_router_weight_on_input,
-        is_ep=expert_map is not None,
        b_q_type=quant_type,
        size_m=M,
        size_n=2 * N,
@@ -187,7 +186,6 @@ def _fused_marlin_moe(
        moe_block_size=block_size_m,
        top_k=1,
        mul_topk_weights=not apply_router_weight_on_input,
-        is_ep=expert_map is not None,
        b_q_type=quant_type,
        size_m=M * num_topk,
        size_n=K,