From 60cabb186d30b47841d809a01be986d876668f5f Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 24 May 2026 03:37:43 +0000 Subject: [PATCH] fix: always provide valid gP tensor --- dsv4/kernels/attention/fmha.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dsv4/kernels/attention/fmha.py b/dsv4/kernels/attention/fmha.py index 2995d6f2..354ba7be 100644 --- a/dsv4/kernels/attention/fmha.py +++ b/dsv4/kernels/attention/fmha.py @@ -91,6 +91,10 @@ class FmhaKernel: @cute.jit def __call__(self, q, k, v, c, stream, lse=None, gP=None): self.q_dtype = q.element_type; self.o_dtype = c.element_type; self.c_dtype = self.o_dtype + # If gP not provided, create a dummy tensor (for non-SMEM-P paths) + if gP is None: + _gP_dummy = torch.zeros(128, self.s_k, dtype=torch.bfloat16, device='cuda') + gP = ct.from_dlpack(_gP_dummy).mark_layout_dynamic(leading_dim=ct.get_leading_dim(_gP_dummy)) self.a_major = LayoutEnum.from_tensor(q).mma_major_mode() self.b_major = LayoutEnum.from_tensor(k).mma_major_mode() v_fmha = cute.make_tensor(