debug: sync + printf before mega_moe kernel launch
This commit is contained in:
@@ -89,6 +89,12 @@ static void __instantiate_kernel() {{
|
||||
}
|
||||
|
||||
static void launch_impl(const KernelHandle& kernel, const LaunchConfigHandle& config, Args args) {
|
||||
// Debug: sync before launch to flush TMA creation errors
|
||||
if (get_env<int>("DG_JIT_DEBUG")) {
|
||||
DG_CUDA_CHECK(cudaDeviceSynchronize());
|
||||
printf("[MEGA_MOE_LAUNCH_DEBUG] About to launch kernel, y=%p, num_tokens=%d\n", args.y, args.num_tokens);
|
||||
fflush(stdout);
|
||||
}
|
||||
DG_CUDA_UNIFIED_CHECK(launch_kernel(kernel, config,
|
||||
args.y,
|
||||
args.cumulative_local_expert_recv_stats,
|
||||
|
||||
Reference in New Issue
Block a user