diag: remove format=5 override, keep block_m=128 baseline test
This commit is contained in:
@@ -851,20 +851,21 @@ sm100_fp8_nvfp4_mega_moe_impl(void* y,
|
||||
// DIAGNOSTIC: Force-override instr_desc bitfields
|
||||
// Test 1: Force a_format/b_format to 5 (MXF8F6F4Format::E2M1 encoding)
|
||||
// MXF4Format::E2M1=1 but MXF8F6F4Format::E2M1=5 — hardware may expect 5
|
||||
// RESULT: format=5 makes no difference, disabled
|
||||
// Test 2: Force scale_format to 1 (E8M0) to see if bit 23 matters
|
||||
// Test 3: a_sf_id/b_sf_id already set by make_runtime_instr_desc_with_sf_id
|
||||
{
|
||||
uint32_t raw = static_cast<uint32_t>(instr_desc);
|
||||
// Clear a_format [7,10) and b_format [10,13), then OR in 5 for both
|
||||
raw = (raw & ~((0x7u << 7) | (0x7u << 10))) | (5u << 7) | (5u << 10);
|
||||
// Force scale_format bit [23] to 1 (E8M0)
|
||||
// raw |= (1u << 23); // uncomment to test scale_fmt=1
|
||||
instr_desc = *reinterpret_cast<cute::UMMA::InstrDescriptorBlockScaled*>(&raw);
|
||||
if (lane_idx == 0) {
|
||||
printf("[DIAG-FORCE] after override: raw=0x%08x a_fmt=%u b_fmt=%u scale_fmt=%u\n",
|
||||
raw, (raw >> 7) & 7, (raw >> 10) & 7, (raw >> 23) & 1);
|
||||
}
|
||||
}
|
||||
// {
|
||||
// uint32_t raw = static_cast<uint32_t>(instr_desc);
|
||||
// // Clear a_format [7,10) and b_format [10,13), then OR in 5 for both
|
||||
// raw = (raw & ~((0x7u << 7) | (0x7u << 10))) | (5u << 7) | (5u << 10);
|
||||
// // Force scale_format bit [23] to 1 (E8M0)
|
||||
// // raw |= (1u << 23); // uncomment to test scale_fmt=1
|
||||
// instr_desc = *reinterpret_cast<cute::UMMA::InstrDescriptorBlockScaled*>(&raw);
|
||||
// if (lane_idx == 0) {
|
||||
// printf("[DIAG-FORCE] after override: raw=0x%08x a_fmt=%u b_fmt=%u scale_fmt=%u\n",
|
||||
// raw, (raw >> 7) & 7, (raw >> 10) & 7, (raw >> 23) & 1);
|
||||
// }
|
||||
// }
|
||||
|
||||
// Wait tensor memory empty barrier arrival
|
||||
const auto accum_stage_idx = current_iter_idx % kNumEpilogueStages;
|
||||
|
||||
Reference in New Issue
Block a user