D1.5 complete: HD=512 support via hd_chunk tiling with native TMEM columns

This commit is contained in:
2026-05-30 07:02:41 +00:00
parent f2592ea0da
commit 20f3ccd992

View File

@@ -180,7 +180,7 @@ static int test_single(int T, int s_k, int n_h = 1, int batch = 1) {
o_ref, nullptr, HD, T, s_k, SCALE);
float cs = 0, na = 0, nb = 0;
int check_hd = HD; // Check full HD (all chunks)
int check_hd = HD;
for (int t = 0; t < T; t++) {
for (int d = 0; d < check_hd; d++) {
float a = bf16_to_f32_host(h_o[h * MAX_T * HD + t * HD + d]);