diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py index c35cdb121..c5e3dabe5 100644 --- a/benchmarks/kernels/benchmark_moe.py +++ b/benchmarks/kernels/benchmark_moe.py @@ -226,9 +226,10 @@ def benchmark_config( x, input_gating, topk, renormalize=not use_deep_gemm ) + inplace = not disable_inplace() if use_deep_gemm: return deep_gemm_experts( - x, w1, w2, topk_weights, topk_ids, inplace=True + x, w1, w2, topk_weights, topk_ids, inplace=inplace ) return fused_experts( x, @@ -236,7 +237,7 @@ def benchmark_config( w2, topk_weights, topk_ids, - inplace=True, + inplace=inplace, quant_config=quant_config, )