Debug: add printf at kernel start

This commit is contained in:
2026-05-28 14:27:12 +00:00
parent 9e13096bf8
commit 738e39cb63

View File

@@ -36,6 +36,7 @@ test_fmha_smem_p(const bf16_t* __restrict__ q, const bf16_t* __restrict__ k,
float* __restrict__ o_scalar, float scale)
{
const int tid = threadIdx.x, wid = tid / 32, lane = tid % 32;
if (tid == 0) printf("Kernel started, smem=%d, wid=%d\n", threadIdx.x, wid);
extern __shared__ char sbuf[];
uint32_t* sTmemBase = (uint32_t*)sbuf;