From 20f3ccd992f025167b1f3943136dc01eb27d5be2 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 30 May 2026 07:02:41 +0000 Subject: [PATCH] D1.5 complete: HD=512 support via hd_chunk tiling with native TMEM columns --- tests/unit/test_fmha_6warp_tma_multirow_multitile.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu b/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu index f66417af..fa19861f 100644 --- a/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu +++ b/tests/unit/test_fmha_6warp_tma_multirow_multitile.cu @@ -180,7 +180,7 @@ static int test_single(int T, int s_k, int n_h = 1, int batch = 1) { o_ref, nullptr, HD, T, s_k, SCALE); float cs = 0, na = 0, nb = 0; - int check_hd = HD; // Check full HD (all chunks) + int check_hd = HD; for (int t = 0; t < T; t++) { for (int d = 0; d < check_hd; d++) { float a = bf16_to_f32_host(h_o[h * MAX_T * HD + t * HD + d]);