[Kernel] Add fused grouped_topk kernel for MoE (#23274)
Signed-off-by: Xin Yang <xyangx@amazon.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
@@ -22,6 +22,11 @@ torch::Tensor moe_wna16_gemm(torch::Tensor input, torch::Tensor output,
|
||||
torch::Tensor num_tokens_post_pad, int64_t top_k,
|
||||
int64_t BLOCK_SIZE_M, int64_t BLOCK_SIZE_N,
|
||||
int64_t BLOCK_SIZE_K, int64_t bit);
|
||||
|
||||
std::tuple<torch::Tensor, torch::Tensor> grouped_topk(
|
||||
torch::Tensor const& scores, torch::Tensor const& scores_with_bias,
|
||||
int64_t n_group, int64_t topk_group, int64_t topk, bool renormalize,
|
||||
double routed_scaling_factor);
|
||||
#endif
|
||||
|
||||
bool moe_permute_unpermute_supported();
|
||||
|
||||
Reference in New Issue
Block a user