auto: pre-test commit
This commit is contained in:
@@ -48,7 +48,7 @@ test_tma_load_kernel(CUtensorMap* tma_desc, bf16_t* gmem_dst) {
|
||||
"cp.async.bulk.tensor.2d.shared::cluster.global.mbarrier::complete_tx::bytes "
|
||||
"[%0], [%1, {%3, %4}], [%2];"
|
||||
:: "r"(smem_addr),
|
||||
"l"((uint64_t)*tma_desc),
|
||||
"l"((uint64_t)tma_desc),
|
||||
"r"(mbar_addr),
|
||||
"r"(0), // coord_x (column)
|
||||
"r"(0) // coord_y (row)
|
||||
|
||||
Reference in New Issue
Block a user