diag: force block_m=128 to test UMMA_N=192 validity for mxf4nvf4

This commit is contained in:
2026-05-12 19:37:11 +00:00
parent c1cbe488f3
commit 2c09545faa

View File

@@ -82,6 +82,13 @@ static std::tuple<int, int, int, int> get_block_config_for_mega_moe(
}
}();
// DIAGNOSTIC: Force block_m=128 for NVFP4 mxf4nvf4 testing
// 192 may not be a valid UMMA_N for mxf4nvf4 hardware
const int forced_block_m = 128;
const int forced_store_block_m = 32;
const int forced_epilogue_threads = 256;
return {2, forced_block_m, forced_store_block_m, forced_epilogue_threads};
// Check whether our `block_m` lies in `kCandidateBlockM`
DG_HOST_ASSERT(std::any_of(
layout::kCandidateBlockM, layout::kCandidateBlockM + layout::kNumCandidateBlockMs,