[cpu][performance] CPU Paged Attention NEON BFMMLA BF16 Implementation (#32263)

Signed-off-by: Gassan <gassan.salama@arm.com>
This commit is contained in:
Gassan Salama
2026-02-06 07:01:48 +00:00
committed by GitHub
parent 965525667b
commit 1363e3d6d5
4 changed files with 704 additions and 4 deletions

View File

@@ -1107,7 +1107,8 @@ class AttentionMainLoop {
if (sliding_window_left != -1) {
pos = std::max(pos, curr_token_pos - sliding_window_left);
}
return pos;
// Clamp to tile end to avoid OOB when window starts past the tile
return std::min(pos, kv_tile_end_pos);
}();
int32_t right_kv_pos = [&]() {