[BugFix] [Kernel] Add Cutlass2x fallback kernels (#5744)

Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
Varun Sundar Rabindranath
2024-06-24 02:37:11 +05:30
committed by GitHub
parent 832ea88fcb
commit 6c916ac8a8
2 changed files with 54 additions and 6 deletions

View File

@@ -17,3 +17,11 @@ inline uint32_t next_pow_2(uint32_t const num) {
return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1));
}
inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) {
int max_shared_mem_per_block_opt_in = 0;
cudaDeviceGetAttribute(&max_shared_mem_per_block_opt_in,
cudaDevAttrMaxSharedMemoryPerBlockOptin,
device);
return max_shared_mem_per_block_opt_in;
}