test: force 2 K-tiles for debug

This commit is contained in:
2026-05-28 12:09:45 +00:00
parent 8b2200a6d3
commit c2e41a858e

View File

@@ -67,7 +67,7 @@ test_umma_qk_hd64_1ktile(const bf16_t* q, const bf16_t* k,
uint32_t idesc = make_idesc(128, 128);
// K-tile loop with accumulate
for (int kt = 0; kt < hd / 16; kt++) {
for (int kt = 0; kt < 2; kt++) { // Force 2 K-tiles for debug
// K-tile kt: columns [16*kt, 16*kt+16)
// In canonical layout, columns start at core_k = 2*kt and 2*kt+1
// Offset = 2*kt * 2048 bytes from matrix base