From 28e04a5ea89e8bdb2c9c7386cf354468f4a7f803 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 22:56:29 +0000 Subject: [PATCH] fix: use __cvta_generic_to_shared directly for 64-bit compat --- tests/unit/test_tmem_row_offset.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/test_tmem_row_offset.cu b/tests/unit/test_tmem_row_offset.cu index 6cd4d136..50f5c99c 100644 --- a/tests/unit/test_tmem_row_offset.cu +++ b/tests/unit/test_tmem_row_offset.cu @@ -38,8 +38,7 @@ __global__ void test_tmem_row_offset(float* results) { uint32_t* sTmemBase = (uint32_t*)sbuf; // Alloc TMEM - uint32_t smem_ptr; - asm volatile("cvta.to.shared.u32 %0, %1;" : "=r"(smem_ptr) : "r"((uint32_t)__cvta_generic_to_shared(sTmemBase))); + uint32_t smem_ptr = __cvta_generic_to_shared(sTmemBase); if (lane == 0) tmem_alloc(smem_ptr, TMEM_N); __syncwarp(); uint32_t tb = *sTmemBase;