From 49e5646b42ff08403e49747d729824062e6cbefa Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 11 May 2026 22:55:28 +0000 Subject: [PATCH] =?UTF-8?q?fix:=20remove=20duplicate=20kInt8=20case=20?= =?UTF-8?q?=E2=80=94=20kPackedFP4=20is=20already=20kInt8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kPackedFP4 = torch::kInt8, so the kInt8 case was a duplicate. The real fix was in mega_nvfp4.hpp: changing kUInt8→kInt8 so tensors match the existing kPackedFP4 path in the TMA switch. --- csrc/jit_kernels/impls/runtime_utils.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/csrc/jit_kernels/impls/runtime_utils.hpp b/csrc/jit_kernels/impls/runtime_utils.hpp index a2fd276..72a76f0 100644 --- a/csrc/jit_kernels/impls/runtime_utils.hpp +++ b/csrc/jit_kernels/impls/runtime_utils.hpp @@ -82,7 +82,6 @@ static CUtensorMapDataType aten_dtype_to_tensor_map_dtype(const at::ScalarType& case torch::kFloat: return CU_TENSOR_MAP_DATA_TYPE_FLOAT32; case torch::kBFloat16: return CU_TENSOR_MAP_DATA_TYPE_BFLOAT16; case torch::kFloat8_e4m3fn: return CU_TENSOR_MAP_DATA_TYPE_UINT8; - case torch::kInt8: return CU_TENSOR_MAP_DATA_TYPE_UINT8; #if CUDA_VERSION >= 12080 case kPackedFP4: return fp4_unpacked_smem ? CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B : CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B;