From e940786fd5aa002d9f3c715fa2fde3a3939ec28c Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 06:32:01 +0000 Subject: [PATCH] fix: HD_val variable name in test --- tests/unit/test_fmha_sm100_standalone.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/test_fmha_sm100_standalone.cu b/tests/unit/test_fmha_sm100_standalone.cu index 69dfff62..5e50450e 100644 --- a/tests/unit/test_fmha_sm100_standalone.cu +++ b/tests/unit/test_fmha_sm100_standalone.cu @@ -92,13 +92,13 @@ int test_kernel(const char* name, int HD_val, int sk, float scale, } // Copy result and compare - uint16_t* hob = (uint16_t*)malloc(B*H*HD*sizeof(uint16_t)); - cudaMemcpy(hob, do_gpu, B*H*HD*sizeof(uint16_t), cudaMemcpyDeviceToHost); + uint16_t* hob = (uint16_t*)malloc(B*H*HD_val*sizeof(uint16_t)); + cudaMemcpy(hob, do_gpu, B*H*HD_val*sizeof(uint16_t), cudaMemcpyDeviceToHost); - float* ho_gpu = (float*)malloc(B*H*HD*sizeof(float)); - for (int i = 0; i < B*H*HD; i++) ho_gpu[i] = bf16_to_f32_cpu(hob[i]); + float* ho_gpu = (float*)malloc(B*H*HD_val*sizeof(float)); + for (int i = 0; i < B*H*HD_val; i++) ho_gpu[i] = bf16_to_f32_cpu(hob[i]); - float cos = cosine_sim(ho_gpu, ho_ref, B*H*HD); + float cos = cosine_sim(ho_gpu, ho_ref, B*H*HD_val); int pass = cos > 0.999f; printf(" %s hd=%d s_k=%d: cos %.6f %s\n", name, HD, sk, cos, pass ? "✅" : "❌");