test: 64 threads, 2 stores to col 0

This commit is contained in:
2026-05-28 09:57:53 +00:00
parent 987f2c8917
commit 24c5afe1dc

View File

@@ -74,7 +74,7 @@ int main() {
float* d_out; cudaMalloc(&d_out, 2 * sizeof(float));
cudaMemset(d_out, 0, 2 * sizeof(float));
test_tmem_2col<<<1, 32, 1024>>>(d_out);
test_tmem_2col<<<1, 64, 1024>>>(d_out);
cudaError_t err = cudaDeviceSynchronize();
if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; }