From ef4c0ad4893181ec6d76a9ecc8d7d5b8fd03f94d Mon Sep 17 00:00:00 2001 From: biondizzle Date: Mon, 1 Jun 2026 07:29:30 +0000 Subject: [PATCH] Fix BF16 router mma_tiler: use cutlass.Int32 for CuTe DSL compatibility --- dsv4/kernels/router/dense_router_decode_kernel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dsv4/kernels/router/dense_router_decode_kernel.py b/dsv4/kernels/router/dense_router_decode_kernel.py index a175d5f3..7b24baa4 100644 --- a/dsv4/kernels/router/dense_router_decode_kernel.py +++ b/dsv4/kernels/router/dense_router_decode_kernel.py @@ -67,7 +67,8 @@ class DenseRouterDecodeKernel: self._tiled_mma = self._create_tiled_mma() mma_inst_shape_k = cute.size(self._tiled_mma.shape_mnk, mode=[2]) mma_inst_tile_k = 4 - self.mma_tiler = (*self.mma_tiler_mn, mma_inst_shape_k * mma_inst_tile_k) + k_tile = mma_inst_shape_k * mma_inst_tile_k + self.mma_tiler = (cutlass.Int32(self.mma_tiler_mn[0]), cutlass.Int32(self.mma_tiler_mn[1]), cutlass.Int32(k_tile)) self.cta_tile_shape_mnk = ( self.mma_tiler[0] // cute.size(self._tiled_mma.thr_id.shape), self.mma_tiler[1], self.mma_tiler[2],