From 99f13cf52e5df76a4deec5a1a077dc792f40f67d Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 23 May 2026 03:50:09 +0000 Subject: [PATCH] fix: BFloat16 not Float32 for bf16 reg --- dsv4/kernels/attention/fmha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index d9b841e4..0cf1ca0f 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -249,7 +249,7 @@ class FmhaKernel: # For now: fill rP_bf16_reg from tTMEM_LOADrS (FP32→BF16 conversion) for j in cutlass.range(cute.size(rP_bf16_reg), vectorize=True): # TODO: proper element mapping from QK→PV partition - rP_bf16_reg[j] = Float32(0.0) + rP_bf16_reg[j] = BFloat16(0.0) cute.copy(rP_bf16_reg, tCrP_smem) cute.arch.fence_proxy("async.shared", space="cta")