From 8cb32cabc995ce2f32d10fcd3f1cd09d60b3ac24 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Thu, 28 May 2026 14:25:04 +0000 Subject: [PATCH] Fix asm constraint typo --- tests/unit/test_fmha_smem_p.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_fmha_smem_p.cu b/tests/unit/test_fmha_smem_p.cu index 784d966c..726eebac 100644 --- a/tests/unit/test_fmha_smem_p.cu +++ b/tests/unit/test_fmha_smem_p.cu @@ -148,7 +148,7 @@ test_fmha_smem_p(const bf16_t* __restrict__ q, const bf16_t* __restrict__ k, float tmp[8]; asm volatile("tcgen05.ld.sync.aligned.32x32b.x8.b32 {%0,%1,%2,%3,%4,%5,%6,%7},[%8];" : "=f"(tmp[0]),"=f"(tmp[1]),"=f"(tmp[2]),"=f"(tmp[3]), - "=f"(tmp[4]),"=f"(tmp[5]),"=f"(tmp[6]),"f"(tmp[7]) + "=f"(tmp[4]),"=f"(tmp[5]),"=f"(tmp[6]),"=f"(tmp[7]) : "r"(tb + n*8)); asm volatile("tcgen05.wait::ld.sync.aligned;"); if (lane == 0) for (int c=0;c<8;c++) o_vals[n*8+c] = tmp[c];