From 2b09d4f2ef6cf9408e2ef5c52d2e0f4812a8318a Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 20:04:45 +0000 Subject: [PATCH] Fix nvcc goto-bypasses-init in multi-row test --- tests/unit/test_fmha_6warp_multirow.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_fmha_6warp_multirow.cu b/tests/unit/test_fmha_6warp_multirow.cu index 8c0ddcd0..834cf8db 100644 --- a/tests/unit/test_fmha_6warp_multirow.cu +++ b/tests/unit/test_fmha_6warp_multirow.cu @@ -142,12 +142,12 @@ static int test_single_T(int T, int n_h = 1, int batch = 1) { pass = 0; goto cleanup; } - cudaMemcpy(h_o, d_o, total_heads * T * HD * sizeof(bf16_t), cudaMemcpyDeviceToHost); - cudaMemcpy(h_lse, d_lse, total_heads * T * sizeof(float), cudaMemcpyDeviceToHost); - - // Verify each head + // Verify each head (declared before goto targets) int checked = 0, failed = 0; float min_cos = 1.0f; + + cudaMemcpy(h_o, d_o, total_heads * T * HD * sizeof(bf16_t), cudaMemcpyDeviceToHost); + cudaMemcpy(h_lse, d_lse, total_heads * T * sizeof(float), cudaMemcpyDeviceToHost); for (int b = 0; b < batch; b++) { for (int h = 0; h < n_h; h++) { int idx = b * n_h + h;