test: 1 K-tile, volatile writes, verify SMEM
This commit is contained in:
@@ -70,7 +70,7 @@ test_umma_qk_hd64_1ktile(const bf16_t* q, const bf16_t* k,
|
||||
uint32_t idesc = make_idesc(128, 128);
|
||||
|
||||
// K-tile loop with accumulate
|
||||
for (int kt = 0; kt < 2; kt++) { // Force 2 K-tiles for debug
|
||||
for (int kt = 0; kt < 1; kt++) { // 1 K-tile only
|
||||
// K-tile kt: columns [16*kt, 16*kt+16)
|
||||
// In canonical layout, columns start at core_k = 2*kt and 2*kt+1
|
||||
// Offset = 2*kt * 2048 bytes from matrix base
|
||||
|
||||
Reference in New Issue
Block a user