diff --git a/tests/unit/test_umma_qk_hd64.cu b/tests/unit/test_umma_qk_hd64.cu index e0fcd4d9..bd9cac73 100644 --- a/tests/unit/test_umma_qk_hd64.cu +++ b/tests/unit/test_umma_qk_hd64.cu @@ -70,7 +70,7 @@ test_umma_qk_hd64_1ktile(const bf16_t* q, const bf16_t* k, uint32_t idesc = make_idesc(128, 128); // K-tile loop with accumulate - for (int kt = 0; kt < 2; kt++) { // Force 2 K-tiles for debug + for (int kt = 0; kt < 1; kt++) { // 1 K-tile only // K-tile kt: columns [16*kt, 16*kt+16) // In canonical layout, columns start at core_k = 2*kt and 2*kt+1 // Offset = 2*kt * 2048 bytes from matrix base