diff --git a/dsv4/kernels/router/dense_router_decode_kernel.py b/dsv4/kernels/router/dense_router_decode_kernel.py index f1ebcb94..c44bfc97 100644 --- a/dsv4/kernels/router/dense_router_decode_kernel.py +++ b/dsv4/kernels/router/dense_router_decode_kernel.py @@ -109,6 +109,7 @@ class DenseRouterDecodeKernel: # All MLIR-dependent setup (tiled_mma, TMA atoms, CuTe tensor conversion) # must happen inside cute.compile context. This matches the MoE kernel pattern. + @cute.jit def _compiled_fn(X, W_gate, e_bias, out_w, out_ids): self._setup_attributes() tiled_mma = self._tiled_mma