From 300482e40aa6382c88fdaba2ae1280c33a3a2405 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 23 May 2026 05:09:19 +0000 Subject: [PATCH] Fix tOrP0 indexing: 3-dim slice (None,None,kb) not 4-dim --- dsv4/kernels/attention/fmha.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 87ff4ce6..67de010b 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -310,7 +310,7 @@ class FmhaKernel: if not use_smem_p: # TMEM-P: P from TMEM for kb in cutlass.range(cute.size(tOrP0, mode=[2]), unroll_full=True): - cute.gemm(pv_mma, tOtO0, tOrP0[(None, None, kb, 0)], tCrV[(None, None, kb, kvh.index)], tOtO0) + cute.gemm(pv_mma, tOtO0, tOrP0[(None, None, kb)], tCrV[(None, None, kb, kvh.index)], tOtO0) else: # SMEM-P: P from SMEM for kb in cutlass.range(cute.size(tCrP, mode=[2]), unroll_full=True):