debug: add TMA descriptor error reporting

This commit is contained in:
2026-05-29 04:38:57 +00:00
parent c7a6d7d231
commit b78ebe8a9c
2 changed files with 9 additions and 2 deletions

View File

@@ -145,6 +145,11 @@ inline bool create_tma_desc_2d_bf16(
CU_TENSOR_MAP_L2_PROMOTION_NONE,
CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE
);
if (res != CUDA_SUCCESS) {
fprintf(stderr, "cuTensorMapEncodeTiled failed: error=%d, gdim=[%lu,%lu], gstr=[%lu,%lu], tdim=[%u,%u], tstr=[%u,%u]\n",
(int)res, global_dim[0], global_dim[1], global_str[0], global_str[1],
tile_dim[0], tile_dim[1], tile_str[0], tile_str[1]);
}
return res == CUDA_SUCCESS;
}

View File

@@ -159,8 +159,10 @@ struct TmaDescSet {
// The data in GMEM starts at d_q, shape (T, HD), stride (HD, 1).
// We treat it as (128, HD) — rows beyond T are garbage, kernel ignores them.
uint32_t q_tile_rows = 128;
if (!create_tma_desc_2d_bf16(&tma_q, d_q, 128, (uint64_t)hd, q_tile_rows, (uint32_t)hd)) {
printf(" Failed to create Q TMA desc\n"); return false;
CUresult q_res = create_tma_desc_2d_bf16(&tma_q, d_q, 128, (uint64_t)hd, q_tile_rows, (uint32_t)hd);
if (q_res != true) {
printf(" Failed to create Q TMA desc: rows=128, cols=%d, tile_rows=128, tile_cols=%d\n", hd, hd);
return false;
}
// K: (s_k, HD) — TMA tile = (16, s_k) to load one K sub-tile at a time