diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 3f1a70b6..17b7df14 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -378,10 +378,6 @@ class FmhaKernel: k2 = k_coord // 64 _sP_nostage[(m_coord, k0), 0, (k1, k2)] = rP_bf16[(j0, 0), j1, 0, 0] cute.arch.fence_proxy("async.shared", space="cta") - # O rescale register tensor (defined unconditionally for CuTeDSL scoping) - tTMrO = cute.make_rmem_tensor( - (tTMEM_LOADcO.shape, 128 // corr_tile_size), self.acc_dtype - ) if kt > 0: for i in range(n_corr_tiles): tTMrO_i_ = tTMrO[None, i]