[Misc] Fused MoE Marlin support for GPTQ (#8217)
This commit is contained in:
@@ -1737,4 +1737,4 @@ torch::Tensor marlin_gemm_moe(
|
||||
moe_block_size, dev, at::cuda::getCurrentCUDAStream(dev), thread_k,
|
||||
thread_n, sms, max_par, replicate_input, apply_weights);
|
||||
return c;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user