From 2c09545faae430cf892759d79edfd7b61d7ae746 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 12 May 2026 19:37:11 +0000 Subject: [PATCH] diag: force block_m=128 to test UMMA_N=192 validity for mxf4nvf4 --- csrc/jit_kernels/heuristics/mega_moe.hpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/csrc/jit_kernels/heuristics/mega_moe.hpp b/csrc/jit_kernels/heuristics/mega_moe.hpp index 9a7e2c8..b376a70 100644 --- a/csrc/jit_kernels/heuristics/mega_moe.hpp +++ b/csrc/jit_kernels/heuristics/mega_moe.hpp @@ -82,6 +82,13 @@ static std::tuple get_block_config_for_mega_moe( } }(); + // DIAGNOSTIC: Force block_m=128 for NVFP4 mxf4nvf4 testing + // 192 may not be a valid UMMA_N for mxf4nvf4 hardware + const int forced_block_m = 128; + const int forced_store_block_m = 32; + const int forced_epilogue_threads = 256; + return {2, forced_block_m, forced_store_block_m, forced_epilogue_threads}; + // Check whether our `block_m` lies in `kCandidateBlockM` DG_HOST_ASSERT(std::any_of( layout::kCandidateBlockM, layout::kCandidateBlockM + layout::kNumCandidateBlockMs,