permute/unpermute kernel for moe optimization (#14568)

Signed-off-by: Caleb_Du <Caleb_Du@zju.edu.cn>
This commit is contained in:
Caleb_Du
2025-05-03 02:31:55 +08:00
committed by GitHub
parent 0f87d8f7b2
commit 3e887d2e0c
19 changed files with 1474 additions and 28 deletions

View File

@@ -115,8 +115,8 @@ def benchmark_config(config: BenchmarkConfig,
from vllm.model_executor.layers.fused_moe import override_config
with override_config(config):
if use_deep_gemm:
topk_weights, topk_ids = fused_topk(x, input_gating, topk,
False)
topk_weights, topk_ids, token_expert_indices = fused_topk(
x, input_gating, topk, False)
return fused_experts(
x,
w1,