test: 64 threads, 2 stores to col 0
This commit is contained in:
@@ -74,7 +74,7 @@ int main() {
|
||||
float* d_out; cudaMalloc(&d_out, 2 * sizeof(float));
|
||||
cudaMemset(d_out, 0, 2 * sizeof(float));
|
||||
|
||||
test_tmem_2col<<<1, 32, 1024>>>(d_out);
|
||||
test_tmem_2col<<<1, 64, 1024>>>(d_out);
|
||||
cudaError_t err = cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; }
|
||||
|
||||
|
||||
Reference in New Issue
Block a user