From a04d79497942961d0c3d9639bbc987dfd223c967 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 08:49:37 +0000 Subject: [PATCH] =?UTF-8?q?debug:=20skip=20TMEM=20alloc=20=E2=80=94=20test?= =?UTF-8?q?=20SMEM=20loads=20only?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dsv4/kernels/attention/fmha_qk_verify.cuh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dsv4/kernels/attention/fmha_qk_verify.cuh b/dsv4/kernels/attention/fmha_qk_verify.cuh index 396b6ca8..3d4628d6 100644 --- a/dsv4/kernels/attention/fmha_qk_verify.cuh +++ b/dsv4/kernels/attention/fmha_qk_verify.cuh @@ -48,6 +48,9 @@ fmha_qk_verify( } __syncthreads(); + // SKIP TMEM — just test SMEM loads and scalar QK + // No TMEM alloc, no MMA + /* // TMEM alloc for S: 128 columns if (wid == 0) { uint32_t smem_ptr = __cvta_generic_to_shared(sTmemBase); @@ -55,6 +58,8 @@ fmha_qk_verify( } __syncthreads(); uint32_t tmem_base = *sTmemBase; + */ + uint32_t tmem_base = 0; // dummy // Zero TMEM S if (wid == 0) {