diff --git a/tests/unit/test_tma_proper.cu b/tests/unit/test_tma_proper.cu index 36b11931..1fc0882a 100644 --- a/tests/unit/test_tma_proper.cu +++ b/tests/unit/test_tma_proper.cu @@ -48,7 +48,7 @@ test_tma_load_kernel(CUtensorMap* tma_desc, bf16_t* gmem_dst) { "cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes " "[%0], [%1, {%3, %4}], [%2];" :: "r"(smem_addr), - "l"((uint64_t)*tma_desc), + "l"((uint64_t)tma_desc), "r"(mbar_addr), "r"(0), // coord_x (column) "r"(0) // coord_y (row)