debug: unbuffered stdout

This commit is contained in:
2026-05-30 04:46:11 +00:00
parent 8be8813d54
commit 1dca8d8cfa

View File

@@ -153,6 +153,7 @@ static int test_single(int T, int s_k, int n_h = 1, int batch = 1) {
dim3 grid(1, n_h, batch);
printf(" Launching kernel: grid=(%d,%d,%d) smem=%zu\n", grid.x, grid.y, grid.z, smem); fflush(stdout);
fmha_6warp_tma_multirow_multitile_kernel<HD><<<grid, 192, smem>>>(params);
printf(" Kernel launched.\n");
cudaError_t lerr = cudaGetLastError();
if (lerr != cudaSuccess) {
@@ -200,8 +201,8 @@ static int test_single(int T, int s_k, int n_h = 1, int batch = 1) {
}
int main() {
setbuf(stdout, NULL);
printf("START: test_fmha_6warp_tma_multirow_multitile HD=%d\n", HD);
fflush(stdout);
int total_fail = 0;
// Just the most basic test first