[Misc] Fused MoE Marlin support for GPTQ (#8217)

2024-09-09 23:02:52 -04:00
parent c7cb5c3335
commit 6cd5e5b07e
19 changed files with 912 additions and 204 deletions
--- a/csrc/moe/marlin_moe_ops.cu
+++ b/csrc/moe/marlin_moe_ops.cu
@@ -1737,4 +1737,4 @@ torch::Tensor marlin_gemm_moe(
      moe_block_size, dev, at::cuda::getCurrentCUDAStream(dev), thread_k,
      thread_n, sms, max_par, replicate_input, apply_weights);
  return c;
-}
+}