fix: add @cute.jit to router compiled function
This commit is contained in:
@@ -109,6 +109,7 @@ class DenseRouterDecodeKernel:
|
||||
|
||||
# All MLIR-dependent setup (tiled_mma, TMA atoms, CuTe tensor conversion)
|
||||
# must happen inside cute.compile context. This matches the MoE kernel pattern.
|
||||
@cute.jit
|
||||
def _compiled_fn(X, W_gate, e_bias, out_w, out_ids):
|
||||
self._setup_attributes()
|
||||
tiled_mma = self._tiled_mma
|
||||
|
||||
Reference in New Issue
Block a user