clean: remove debug prints, multirow kernel complete with multi-tile KV merge

This commit is contained in:
2026-05-28 23:57:31 +00:00
parent c65baabcc9
commit d1c1eaeddc

View File

@@ -230,13 +230,6 @@ static int test_multitile_merge(int T) {
free(h_o_tile);
}
// Debug: print first LSE values per tile
for (int tile = 0; tile < N_TILES; tile++) {
printf(" tile %d lse[0]=%.6f", tile, lse_per_tile[tile * T]);
if (T > 1) printf(" lse[1]=%.6f", lse_per_tile[tile * T + 1]);
printf("\n");
}
// Python KV merge with normalized O + LSE:
// O = Σ exp(lse_i - L) * O_i_norm / Σ exp(lse_i - L)
// where L = max(lse_i) for numerical stability