From a2627359fb26eadc85531d8f2d174cd1e3a34ff2 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sat, 30 May 2026 10:40:01 +0000 Subject: [PATCH] =?UTF-8?q?P5:=20fix=20TMA=20desc=20creation=20=E2=80=94?= =?UTF-8?q?=20write=20to=20HOST=20then=20cudaMemcpy=20to=20device?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dsv4/kernels/attention/fmha_multitile_capi.cu | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/dsv4/kernels/attention/fmha_multitile_capi.cu b/dsv4/kernels/attention/fmha_multitile_capi.cu index 85597714..24b57010 100644 --- a/dsv4/kernels/attention/fmha_multitile_capi.cu +++ b/dsv4/kernels/attention/fmha_multitile_capi.cu @@ -48,15 +48,19 @@ int fmha_multitile_decode_launch( int idx = b * n_h + h; // K: (N, hd), TMA tile (128, 16) - if (!create_tma_desc_2d_bf16(d_tma_k + idx, k_head, N, hd, 128, 16)) { + CUtensorMap h_desc; + if (!create_tma_desc_2d_bf16(&h_desc, k_head, N, hd, 128, 16)) { cudaFree(d_tma_k); cudaFree(d_tma_v); return -1; } + cudaMemcpy(d_tma_k + idx, &h_desc, sizeof(CUtensorMap), cudaMemcpyHostToDevice); + // V: (hd, N), TMA tile (16, 16) - if (!create_tma_desc_2d_bf16(d_tma_v + idx, v_head, hd, N, 16, 16)) { + if (!create_tma_desc_2d_bf16(&h_desc, v_head, hd, N, 16, 16)) { cudaFree(d_tma_k); cudaFree(d_tma_v); return -1; } + cudaMemcpy(d_tma_v + idx, &h_desc, sizeof(CUtensorMap), cudaMemcpyHostToDevice); } }