fix: use __cvta_generic_to_shared directly for 64-bit compat
This commit is contained in:
@@ -38,8 +38,7 @@ __global__ void test_tmem_row_offset(float* results) {
|
||||
uint32_t* sTmemBase = (uint32_t*)sbuf;
|
||||
|
||||
// Alloc TMEM
|
||||
uint32_t smem_ptr;
|
||||
asm volatile("cvta.to.shared.u32 %0, %1;" : "=r"(smem_ptr) : "r"((uint32_t)__cvta_generic_to_shared(sTmemBase)));
|
||||
uint32_t smem_ptr = __cvta_generic_to_shared(sTmemBase);
|
||||
if (lane == 0) tmem_alloc(smem_ptr, TMEM_N);
|
||||
__syncwarp();
|
||||
uint32_t tb = *sTmemBase;
|
||||
|
||||
Reference in New Issue
Block a user