test: add fence+sync between 2 tmem_stores
This commit is contained in:
@@ -45,8 +45,13 @@ __global__ void test_tmem_loop(float* out) {
|
||||
memcpy(&u2, &v2, 4); memcpy(&u3, &v3, 4);
|
||||
|
||||
tmem_store(tb + 0, u0, u1, u2, u3);
|
||||
}
|
||||
asm volatile("tcgen05.wait::st.sync.aligned;" ::: "memory");
|
||||
__syncthreads();
|
||||
if (threadIdx.x < 32) {
|
||||
tmem_store(tb + 1, u0, u1, u2, u3);
|
||||
}
|
||||
}
|
||||
asm volatile("tcgen05.wait::st.sync.aligned;" ::: "memory");
|
||||
__syncthreads();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user