diff --git a/tests/unit/test_tmem_row_offset.cu b/tests/unit/test_tmem_row_offset.cu index 6cd4d136..50f5c99c 100644 --- a/tests/unit/test_tmem_row_offset.cu +++ b/tests/unit/test_tmem_row_offset.cu @@ -38,8 +38,7 @@ __global__ void test_tmem_row_offset(float* results) { uint32_t* sTmemBase = (uint32_t*)sbuf; // Alloc TMEM - uint32_t smem_ptr; - asm volatile("cvta.to.shared.u32 %0, %1;" : "=r"(smem_ptr) : "r"((uint32_t)__cvta_generic_to_shared(sTmemBase))); + uint32_t smem_ptr = __cvta_generic_to_shared(sTmemBase); if (lane == 0) tmem_alloc(smem_ptr, TMEM_N); __syncwarp(); uint32_t tb = *sTmemBase;