Fix nvcc goto-bypasses-init in multi-row test
This commit is contained in:
@@ -142,12 +142,12 @@ static int test_single_T(int T, int n_h = 1, int batch = 1) {
|
||||
pass = 0; goto cleanup;
|
||||
}
|
||||
|
||||
cudaMemcpy(h_o, d_o, total_heads * T * HD * sizeof(bf16_t), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(h_lse, d_lse, total_heads * T * sizeof(float), cudaMemcpyDeviceToHost);
|
||||
|
||||
// Verify each head
|
||||
// Verify each head (declared before goto targets)
|
||||
int checked = 0, failed = 0;
|
||||
float min_cos = 1.0f;
|
||||
|
||||
cudaMemcpy(h_o, d_o, total_heads * T * HD * sizeof(bf16_t), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(h_lse, d_lse, total_heads * T * sizeof(float), cudaMemcpyDeviceToHost);
|
||||
for (int b = 0; b < batch; b++) {
|
||||
for (int h = 0; h < n_h; h++) {
|
||||
int idx = b * n_h + h;
|
||||
|
||||
Reference in New Issue
Block a user