debug: sync + printf before mega_moe kernel launch

This commit is contained in:
2026-05-13 12:15:49 +00:00
parent ad335c38fb
commit c08a28888d

View File

@@ -89,6 +89,12 @@ static void __instantiate_kernel() {{
}
static void launch_impl(const KernelHandle& kernel, const LaunchConfigHandle& config, Args args) {
// Debug: sync before launch to flush TMA creation errors
if (get_env<int>("DG_JIT_DEBUG")) {
DG_CUDA_CHECK(cudaDeviceSynchronize());
printf("[MEGA_MOE_LAUNCH_DEBUG] About to launch kernel, y=%p, num_tokens=%d\n", args.y, args.num_tokens);
fflush(stdout);
}
DG_CUDA_UNIFIED_CHECK(launch_kernel(kernel, config,
args.y,
args.cumulative_local_expert_recv_stats,