From fe7d5611430bb532c602469a6814d3819d64e44c Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 08:03:53 +0000 Subject: [PATCH] debug: print UMMA descriptor values for diagnosis --- dsv4/kernels/attention/fmha_qk_verify.cuh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/dsv4/kernels/attention/fmha_qk_verify.cuh b/dsv4/kernels/attention/fmha_qk_verify.cuh index ab885610..989d11b0 100644 --- a/dsv4/kernels/attention/fmha_qk_verify.cuh +++ b/dsv4/kernels/attention/fmha_qk_verify.cuh @@ -78,9 +78,21 @@ fmha_qk_verify( uint32_t sQ_smem = __cvta_generic_to_shared(sQ); uint32_t sK_smem = __cvta_generic_to_shared(sK); + if (tid == 0) { + printf("[qk] sQ_smem=0x%x sK_smem=0x%x sQ_align=%d sK_align=%d\n", + sQ_smem, sK_smem, sQ_smem % 16, sK_smem % 16); + } + __syncthreads(); + uint64_t desc_q = make_umma_desc_bf16(sQ_smem, 128, HD, HD, UmmaMajor::MN); uint64_t desc_k = make_umma_desc_bf16(sK_smem, 128, HD, HD, UmmaMajor::K); + if (tid == 0) { + printf("[qk] desc_q=0x%016llx desc_k=0x%016llx\n", + (unsigned long long)desc_q, (unsigned long long)desc_k); + } + __syncthreads(); + // MMA is called by ONE lane (elect_one_sync pattern) if (wid == 0 && lane == 0) { umma_ss_f16(tmem_s, desc_q, desc_k, /*accumulate=*/false);