[BugFix] [Kernel] Add Cutlass2x fallback kernels (#5744)
Co-authored-by: Varun Sundar Rabindranath <varun@neuralmagic.com>
This commit is contained in:
committed by
GitHub
parent
832ea88fcb
commit
6c916ac8a8
@@ -17,3 +17,11 @@ inline uint32_t next_pow_2(uint32_t const num) {
|
||||
return 1 << (CHAR_BIT * sizeof(num) - __builtin_clz(num - 1));
|
||||
}
|
||||
|
||||
inline int get_cuda_max_shared_memory_per_block_opt_in(int const device) {
|
||||
int max_shared_mem_per_block_opt_in = 0;
|
||||
cudaDeviceGetAttribute(&max_shared_mem_per_block_opt_in,
|
||||
cudaDevAttrMaxSharedMemoryPerBlockOptin,
|
||||
device);
|
||||
return max_shared_mem_per_block_opt_in;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user