diff --git a/tests/unit/test_fmha_hd64_smem_p.cu b/tests/unit/test_fmha_hd64_smem_p.cu index 9a9dda5b..3b58b6c4 100644 --- a/tests/unit/test_fmha_hd64_smem_p.cu +++ b/tests/unit/test_fmha_hd64_smem_p.cu @@ -32,8 +32,8 @@ __global__ void __launch_bounds__(128) test_fmha_hd64_smem_p(const bf16_t* q, const bf16_t* k, const bf16_t* v, bf16_t* o_out, float* o_scalar, float scale) { - if (tid == 0) printf("Kernel started! smem_buf=%p\n", sbuf); const int tid = threadIdx.x, wid = tid / 32, lane = tid % 32; + if (tid == 0) printf("Kernel started!\n"); extern __shared__ char sbuf[]; uint32_t* sTmemBase = (uint32_t*)sbuf;