Add launch error check for HD=64
This commit is contained in:
@@ -221,8 +221,12 @@ int main() {
|
||||
printf("SMEM: %d bytes (%.1f KB, limit 232 KB)\n", smem, smem/1024.0f);
|
||||
test_fmha_hd64_smem_p<<<1, 128, smem>>>(d_q, d_k, d_v, d_o, d_o_scalar, SCALE);
|
||||
|
||||
cudaError_t launch_err = cudaGetLastError();
|
||||
if (launch_err != cudaSuccess) { printf("LAUNCH ERROR: %s\n", cudaGetErrorString(launch_err)); return 1; }
|
||||
|
||||
cudaError_t err = cudaDeviceSynchronize();
|
||||
if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; }
|
||||
if (err != cudaSuccess) { printf("CUDA ERROR: %s\n", cudaGetErrorString(err)); return 1; }
|
||||
|
||||
cudaMemcpy(h_o, d_o, HD*sizeof(bf16_t), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(h_o_scalar, d_o_scalar, HD*sizeof(float), cudaMemcpyDeviceToHost);
|
||||
|
||||
Reference in New Issue
Block a user