diff --git a/csrc/jit_kernels/heuristics/mega_moe.hpp b/csrc/jit_kernels/heuristics/mega_moe.hpp index 9a7e2c8..b376a70 100644 --- a/csrc/jit_kernels/heuristics/mega_moe.hpp +++ b/csrc/jit_kernels/heuristics/mega_moe.hpp @@ -82,6 +82,13 @@ static std::tuple get_block_config_for_mega_moe( } }(); + // DIAGNOSTIC: Force block_m=128 for NVFP4 mxf4nvf4 testing + // 192 may not be a valid UMMA_N for mxf4nvf4 hardware + const int forced_block_m = 128; + const int forced_store_block_m = 32; + const int forced_epilogue_threads = 256; + return {2, forced_block_m, forced_store_block_m, forced_epilogue_threads}; + // Check whether our `block_m` lies in `kCandidateBlockM` DG_HOST_ASSERT(std::any_of( layout::kCandidateBlockM, layout::kCandidateBlockM + layout::kNumCandidateBlockMs,