fix: SMEM size for MMA test — account for both sQ0 and sK0

This commit is contained in:
2026-05-28 23:06:07 +00:00
parent fee022a485
commit ff8c677486

View File

@@ -133,7 +133,7 @@ int main() {
// SMEM: sbuf(8) + sRowMax(512) + align(128) + sQ0(4096) + sK0(4096) + slack(256) = 9000
size_t smem_off = 8 + 128*4;
smem_off = ((smem_off + 127) & ~(size_t)127);
smem_off += TILE_SZ * 2 + 256;
smem_off += TILE_SZ * 2 * 2 + 256; // sQ0 + sK0 (each TILE_SZ BF16 = 4096 bytes) + slack
int smem = (int)smem_off;
test_mma_rows<<<1, 192, smem>>>(d_r);