From d54bce6a6d3689408ba9f2b5ae686acbb2d012ca Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 23:01:12 +0000 Subject: [PATCH] fix: correct SMEM size for MMA 4-warp test --- tests/unit/test_tmem_4warp_read.cu | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_tmem_4warp_read.cu b/tests/unit/test_tmem_4warp_read.cu index a3f26922..e32ee1d4 100644 --- a/tests/unit/test_tmem_4warp_read.cu +++ b/tests/unit/test_tmem_4warp_read.cu @@ -153,7 +153,8 @@ int main() { float* d_r; cudaMalloc(&d_r, 32 * sizeof(float)); cudaMemset(d_r, 0, 32 * sizeof(float)); - test_mma_4warp_read<<<1, 192, 4096>>>(d_r); + int smem = 256 + 128 + 128*16*2*2 + 256; // sbuf + align + sA + sB + slack + test_mma_4warp_read<<<1, 192, smem>>>(d_r); cudaError_t err = cudaDeviceSynchronize(); if (err != cudaSuccess) {