diff --git a/tests/unit/test_fmha_sm100_standalone.cu b/tests/unit/test_fmha_sm100_standalone.cu index 0487b1a6..f5b92d38 100644 --- a/tests/unit/test_fmha_sm100_standalone.cu +++ b/tests/unit/test_fmha_sm100_standalone.cu @@ -101,8 +101,10 @@ int test_kernel(const char* name, int HD_val, int sk, float scale, for (int i = 0; i < B*H*HD_val; i++) ho_gpu[i] = bf16_to_f32_cpu(hob[i]); float cos = cosine_sim(ho_gpu, ho_ref, B*H*HD_val); + float max_diff = 0; + for(int i=0;i 0.999f; - printf(" %s hd=%d s_k=%d: cos %.6f %s\n", name, HD_val, sk, cos, pass ? "✅" : "❌"); + printf(" %s hd=%d s_k=%d: cos %.6f max_diff %.6f %s\n", name, HD_val, sk, cos, max_diff, pass ? "✅" : "❌"); if (!pass) { printf(" GPU[:4] = %.6f %.6f %.6f %.6f\n", ho_gpu[0], ho_gpu[1], ho_gpu[2], ho_gpu[3]);