diff --git a/tests/unit/test_p4_tma_descriptor_dump.cu b/tests/unit/test_p4_tma_descriptor_dump.cu index 2a71221e..c8ab4c23 100644 --- a/tests/unit/test_p4_tma_descriptor_dump.cu +++ b/tests/unit/test_p4_tma_descriptor_dump.cu @@ -1,6 +1,15 @@ /** * P4: Dump TMA descriptor bytes for comparison. - * CUDA 13.2 compatible — uses correct API signature. + * CUDA 13.2 compatible. + * + * API signature on CUDA 13.2: + * cuTensorMapEncodeTiled(CUtensorMap*, cuuint32_t rank, + * CUtensorMapDataType, void*, + * cuuint64_t* tensorDims, cuuint64_t* globalStrides, + * cuuint32_t* boxDims, cuuint32_t* elementStrides, + * CUtensorMapInterleave, CUtensorMapSwizzle, CUtensorMapL2promotion) + * + * Note: NO OOB fill parameter on CUDA 13.2 (10 params, not 11). */ #include #include @@ -8,33 +17,6 @@ #include #include -/* CUDA 13.2 cuTensorMapEncodeTiled signature: - * CUresult cuTensorMapEncodeTiled( - * CUtensorMap *tensorMap, - * cuuint32_t tensorRank, - * CUtensorMapDataType dataType, - * void *globalAddress, - * const cuuint64_t *tensorDims, - * const cuuint64_t *globalStrides, - * const cuuint32_t *boxDims, - * const cuuint32_t *elementStrides, - * CUtensorMapInterleave interleave, - * CUtensorMapSwizzle swizzle, - * CUtensorMapL2promotion l2Promotion, - * CUtensorMapOOBfill oobFill - * ); - * - * Note: OOB fill is CUtensorMapOOBfill (lowercase f) in CUDA 13.2 - */ - -// Define missing enum values if needed -#ifndef CU_TENSOR_MAP_OOB_FILL_NONE -#define CU_TENSOR_MAP_OOB_FILL_NONE ((CUtensorMapOOBfill)0) -#endif -#ifndef CU_TENSOR_MAP_OOB_FILL_ZERO -#define CU_TENSOR_MAP_OOB_FILL_ZERO ((CUtensorMapOOBfill)1) -#endif - int main() { const int ROWS = 128; const int COLS = 16; @@ -66,7 +48,7 @@ int main() { res = cuTensorMapEncodeTiled(&tma_desc, 2, CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, d_ptr, tensorDims, globalStrides, boxDims, elementStrides, CU_TENSOR_MAP_INTERLEAVE_NONE, CU_TENSOR_MAP_SWIZZLE_NONE, - CU_TENSOR_MAP_L2_PROMOTION_NONE, CU_TENSOR_MAP_OOB_FILL_NONE); + CU_TENSOR_MAP_L2_PROMOTION_NONE); if (res == CUDA_SUCCESS) dump_desc("NO swizzle", tma_desc); else printf("=== NO swizzle: FAILED (%d) ===\n", res); @@ -74,26 +56,10 @@ int main() { res = cuTensorMapEncodeTiled(&tma_desc, 2, CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, d_ptr, tensorDims, globalStrides, boxDims, elementStrides, CU_TENSOR_MAP_INTERLEAVE_NONE, CU_TENSOR_MAP_SWIZZLE_128B, - CU_TENSOR_MAP_L2_PROMOTION_NONE, CU_TENSOR_MAP_OOB_FILL_NONE); + CU_TENSOR_MAP_L2_PROMOTION_NONE); if (res == CUDA_SUCCESS) dump_desc("SWIZZLE_128B", tma_desc); else printf("=== SWIZZLE_128B: FAILED (%d) ===\n", res); - // 3: NO swizzle, OOB_FILL_ZERO - res = cuTensorMapEncodeTiled(&tma_desc, 2, CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, - d_ptr, tensorDims, globalStrides, boxDims, elementStrides, - CU_TENSOR_MAP_INTERLEAVE_NONE, CU_TENSOR_MAP_SWIZZLE_NONE, - CU_TENSOR_MAP_L2_PROMOTION_NONE, CU_TENSOR_MAP_OOB_FILL_ZERO); - if (res == CUDA_SUCCESS) dump_desc("NO swizzle + OOB_FILL_ZERO", tma_desc); - else printf("=== NO swizzle + OOB_FILL_ZERO: FAILED (%d) ===\n", res); - - // 4: SWIZZLE_128B, OOB_FILL_ZERO - res = cuTensorMapEncodeTiled(&tma_desc, 2, CU_TENSOR_MAP_DATA_TYPE_BFLOAT16, - d_ptr, tensorDims, globalStrides, boxDims, elementStrides, - CU_TENSOR_MAP_INTERLEAVE_NONE, CU_TENSOR_MAP_SWIZZLE_128B, - CU_TENSOR_MAP_L2_PROMOTION_NONE, CU_TENSOR_MAP_OOB_FILL_ZERO); - if (res == CUDA_SUCCESS) dump_desc("SWIZZLE_128B + OOB_FILL_ZERO", tma_desc); - else printf("=== SWIZZLE_128B + OOB_FILL_ZERO: FAILED (%d) ===\n", res); - cudaFree(d_ptr); printf("\nPASSED\n"); return 0;