diag: remove format=5 override, keep block_m=128 baseline test

This commit is contained in:
2026-05-12 20:01:37 +00:00
parent 2c09545faa
commit 4442c06ba8

View File

@@ -851,20 +851,21 @@ sm100_fp8_nvfp4_mega_moe_impl(void* y,
// DIAGNOSTIC: Force-override instr_desc bitfields
// Test 1: Force a_format/b_format to 5 (MXF8F6F4Format::E2M1 encoding)
// MXF4Format::E2M1=1 but MXF8F6F4Format::E2M1=5 — hardware may expect 5
// RESULT: format=5 makes no difference, disabled
// Test 2: Force scale_format to 1 (E8M0) to see if bit 23 matters
// Test 3: a_sf_id/b_sf_id already set by make_runtime_instr_desc_with_sf_id
{
uint32_t raw = static_cast<uint32_t>(instr_desc);
// Clear a_format [7,10) and b_format [10,13), then OR in 5 for both
raw = (raw & ~((0x7u << 7) | (0x7u << 10))) | (5u << 7) | (5u << 10);
// Force scale_format bit [23] to 1 (E8M0)
// raw |= (1u << 23); // uncomment to test scale_fmt=1
instr_desc = *reinterpret_cast<cute::UMMA::InstrDescriptorBlockScaled*>(&raw);
if (lane_idx == 0) {
printf("[DIAG-FORCE] after override: raw=0x%08x a_fmt=%u b_fmt=%u scale_fmt=%u\n",
raw, (raw >> 7) & 7, (raw >> 10) & 7, (raw >> 23) & 1);
}
}
// {
// uint32_t raw = static_cast<uint32_t>(instr_desc);
// // Clear a_format [7,10) and b_format [10,13), then OR in 5 for both
// raw = (raw & ~((0x7u << 7) | (0x7u << 10))) | (5u << 7) | (5u << 10);
// // Force scale_format bit [23] to 1 (E8M0)
// // raw |= (1u << 23); // uncomment to test scale_fmt=1
// instr_desc = *reinterpret_cast<cute::UMMA::InstrDescriptorBlockScaled*>(&raw);
// if (lane_idx == 0) {
// printf("[DIAG-FORCE] after override: raw=0x%08x a_fmt=%u b_fmt=%u scale_fmt=%u\n",
// raw, (raw >> 7) & 7, (raw >> 10) & 7, (raw >> 23) & 1);
// }
// }
// Wait tensor memory empty barrier arrival
const auto accum_stage_idx = current_iter_idx % kNumEpilogueStages;