diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index cceda0bf..44f98718 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -341,7 +341,6 @@ class FmhaKernel: # TMEM-P: store P to TMEM via register bridge cute.copy(tiled_tmem_store, rP_words, tTMEM_STOREtP) cute.arch.fence_view_async_tmem_store() - else: else: # SMEM-P: Use QK C-fragment layout for source (not TMEM layout) # rP_bf16 uses tTMEM_LOADrS.layout (TMEM layout) causing rank mismatch