fix: contiguous copy for SF byte view sanity check

This commit is contained in:
2026-05-12 15:44:50 +00:00
parent cc3e3da45c
commit 758389645a

View File

@@ -351,7 +351,9 @@ def fp8_nvfp4_mega_moe(y: torch.Tensor,
zero_pct = (sf == 0).float().mean().item() * 100
if zero_pct > 50:
print(f"[WARN] {name}: {zero_pct:.1f}% zeros in SF! Possible div-by-zero", flush=True)
sf_u8 = sf.view(torch.uint8)
# Need contiguous for byte view — make a contiguous copy just for the check
sf_c = sf.contiguous()
sf_u8 = sf_c.view(torch.uint8)
nan_count = (sf_u8 == 0x7F).sum().item()
inf_count = (sf_u8 == 0x7E).sum().item()
if nan_count > 0 or inf_count > 0: