From 465e089a2bc3e126e38c3fc6764222a8a42f2abe Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 14:56:07 +0000 Subject: [PATCH] Add launch error check for HD=64 --- tests/unit/test_fmha_hd64_smem_p.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/unit/test_fmha_hd64_smem_p.cu b/tests/unit/test_fmha_hd64_smem_p.cu index 2ab0773e..07a7607c 100644 --- a/tests/unit/test_fmha_hd64_smem_p.cu +++ b/tests/unit/test_fmha_hd64_smem_p.cu @@ -221,8 +221,12 @@ int main() { printf("SMEM: %d bytes (%.1f KB, limit 232 KB)\n", smem, smem/1024.0f); test_fmha_hd64_smem_p<<<1, 128, smem>>>(d_q, d_k, d_v, d_o, d_o_scalar, SCALE); + cudaError_t launch_err = cudaGetLastError(); + if (launch_err != cudaSuccess) { printf("LAUNCH ERROR: %s\n", cudaGetErrorString(launch_err)); return 1; } + cudaError_t err = cudaDeviceSynchronize(); if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; } + if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; } cudaMemcpy(h_o, d_o, HD*sizeof(bf16_t), cudaMemcpyDeviceToHost); cudaMemcpy(h_o_scalar, d_o_scalar, HD*sizeof(float), cudaMemcpyDeviceToHost);