From 49e5646b42ff08403e49747d729824062e6cbefa Mon Sep 17 00:00:00 2001
From: biondizzle <biondizzle@gmail.com>
Date: Mon, 11 May 2026 22:55:28 +0000
Subject: [PATCH] =?UTF-8?q?fix:=20remove=20duplicate=20kInt8=20case=20?=
 =?UTF-8?q?=E2=80=94=20kPackedFP4=20is=20already=20kInt8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kPackedFP4 = torch::kInt8, so the kInt8 case was a duplicate.
The real fix was in mega_nvfp4.hpp: changing kUInt8→kInt8 so
tensors match the existing kPackedFP4 path in the TMA switch.
---
 csrc/jit_kernels/impls/runtime_utils.hpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/csrc/jit_kernels/impls/runtime_utils.hpp b/csrc/jit_kernels/impls/runtime_utils.hpp
index a2fd276..72a76f0 100644
--- a/csrc/jit_kernels/impls/runtime_utils.hpp
+++ b/csrc/jit_kernels/impls/runtime_utils.hpp
@@ -82,7 +82,6 @@ static CUtensorMapDataType aten_dtype_to_tensor_map_dtype(const at::ScalarType&
         case torch::kFloat:         return CU_TENSOR_MAP_DATA_TYPE_FLOAT32;
         case torch::kBFloat16:      return CU_TENSOR_MAP_DATA_TYPE_BFLOAT16;
         case torch::kFloat8_e4m3fn: return CU_TENSOR_MAP_DATA_TYPE_UINT8;
-        case torch::kInt8:           return CU_TENSOR_MAP_DATA_TYPE_UINT8;
 #if CUDA_VERSION >= 12080
         case kPackedFP4:            return fp4_unpacked_smem ? CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN16B
                                                              : CU_TENSOR_MAP_DATA_TYPE_16U4_ALIGN8B;