From a12607b0bd3ef018a457b58a7e1d9aec4d5a090a Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 07:45:32 +0000 Subject: [PATCH] test: add NaN counter to FMHA test --- tests/unit/test_fmha_sm100_standalone.cu | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_fmha_sm100_standalone.cu b/tests/unit/test_fmha_sm100_standalone.cu index f5b92d38..02c0115c 100644 --- a/tests/unit/test_fmha_sm100_standalone.cu +++ b/tests/unit/test_fmha_sm100_standalone.cu @@ -102,9 +102,16 @@ int test_kernel(const char* name, int HD_val, int sk, float scale, float cos = cosine_sim(ho_gpu, ho_ref, B*H*HD_val); float max_diff = 0; - for(int i=0;i 0.999f; - printf(" %s hd=%d s_k=%d: cos %.6f max_diff %.6f %s\n", name, HD_val, sk, cos, max_diff, pass ? "✅" : "❌"); + int nan_count = 0; + for(int i=0;i 0.9999 is the correct threshold for BF16 output. + int pass = cos > 0.9999f; + printf(" %s hd=%d s_k=%d: cos %.6f max_diff %.6f nan=%d %s\n", name, HD_val, sk, cos, max_diff, nan_count, pass ? "✅" : "❌"); if (!pass) { printf(" GPU[:4] = %.6f %.6f %.6f %.6f\n", ho_gpu[0], ho_gpu[1], ho_gpu[2], ho_gpu[3]);