Fix nvcc goto-bypasses-init in multi-row test

This commit is contained in:
2026-05-28 20:04:45 +00:00
parent d8b421ccee
commit 2b09d4f2ef

View File

@@ -142,12 +142,12 @@ static int test_single_T(int T, int n_h = 1, int batch = 1) {
pass = 0; goto cleanup;
}
cudaMemcpy(h_o, d_o, total_heads * T * HD * sizeof(bf16_t), cudaMemcpyDeviceToHost);
cudaMemcpy(h_lse, d_lse, total_heads * T * sizeof(float), cudaMemcpyDeviceToHost);
// Verify each head
// Verify each head (declared before goto targets)
int checked = 0, failed = 0;
float min_cos = 1.0f;
cudaMemcpy(h_o, d_o, total_heads * T * HD * sizeof(bf16_t), cudaMemcpyDeviceToHost);
cudaMemcpy(h_lse, d_lse, total_heads * T * sizeof(float), cudaMemcpyDeviceToHost);
for (int b = 0; b < batch; b++) {
for (int h = 0; h < n_h; h++) {
int idx = b * n_h + h;