auto: pre-test commit

This commit is contained in:
2026-05-28 16:40:51 +00:00
parent 3b8fdcc823
commit a4211559cf

View File

@@ -48,7 +48,7 @@ test_tma_load_kernel(CUtensorMap* tma_desc, bf16_t* gmem_dst) {
"cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes "
"[%0], [%1, {%3, %4}], [%2];"
:: "r"(smem_addr),
"l"((uint64_t)*tma_desc),
"l"((uint64_t)tma_desc),
"r"(mbar_addr),
"r"(0), // coord_x (column)
"r"(0) // coord_y (row)