diff --git a/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu b/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu index f4c8d4e5..f66417af 100644 --- a/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu +++ b/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu @@ -180,7 +180,7 @@ static int test_single(int T, int s_k, int n_h = 1, int batch = 1) { o_ref, nullptr, HD, T, s_k, SCALE); float cs = 0, na = 0, nb = 0; - int check_hd = HD_CHUNK; // Only check first hd_chunk values (for partial debug) + int check_hd = HD; // Check full HD (all chunks) for (int t = 0; t < T; t++) { for (int d = 0; d < check_hd; d++) { float a = bf16_to_f32_host(h_o[h * MAX_T * HD + t * HD + d]);