From a272bc49b0a0a305cdcff7ee3a1b4aee58eb18e9 Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Thu, 14 May 2026 10:21:10 +0000
Subject: [PATCH] Fix: torch::kBFloat16

---
 src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/pytorch_binding.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/pytorch_binding.cpp b/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/pytorch_binding.cpp
index 6d8b53c7..c7549088 100644
--- a/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/pytorch_binding.cpp
+++ b/src/nvfp4_megamoe_kernel/cutlass_nvfp4_gemm/pytorch_binding.cpp
@@ -20,7 +20,7 @@ torch::Tensor cutlass_nvfp4_gemm_forward(
     torch::Tensor SFB,
     int64_t M, int64_t N, int64_t K
 ) {
-    auto D = torch::empty({M, N}, torch::dtype(at::kBF16).device(A_packed.device()));
+    auto D = torch::empty({M, N}, torch::dtype(torch::kBFloat16).device(A_packed.device()));
 
     auto stream = c10::cuda::getCurrentCUDAStream();
     cudaStream_t cuda_stream = stream.stream();