fix: add @cute.jit to router compiled function

This commit is contained in:
2026-05-31 23:44:53 +00:00
parent d5d2b7b4b8
commit cb2ca8591f

View File

@@ -109,6 +109,7 @@ class DenseRouterDecodeKernel:
# All MLIR-dependent setup (tiled_mma, TMA atoms, CuTe tensor conversion)
# must happen inside cute.compile context. This matches the MoE kernel pattern.
@cute.jit
def _compiled_fn(X, W_gate, e_bias, out_w, out_ids):
self._setup_attributes()
tiled_mma = self._tiled_mma