clean: remove debug prints, multirow kernel complete with multi-tile KV merge
This commit is contained in:
@@ -230,13 +230,6 @@ static int test_multitile_merge(int T) {
|
||||
free(h_o_tile);
|
||||
}
|
||||
|
||||
// Debug: print first LSE values per tile
|
||||
for (int tile = 0; tile < N_TILES; tile++) {
|
||||
printf(" tile %d lse[0]=%.6f", tile, lse_per_tile[tile * T]);
|
||||
if (T > 1) printf(" lse[1]=%.6f", lse_per_tile[tile * T + 1]);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Python KV merge with normalized O + LSE:
|
||||
// O = Σ exp(lse_i - L) * O_i_norm / Σ exp(lse_i - L)
|
||||
// where L = max(lse_i) for numerical stability
|
||||
|
||||
Reference in New Issue
Block a user