From 758389645a400e56f6a4ce2f0af3a3e0e1519316 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Tue, 12 May 2026 15:44:50 +0000 Subject: [PATCH] fix: contiguous copy for SF byte view sanity check --- deep_gemm/mega/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/deep_gemm/mega/__init__.py b/deep_gemm/mega/__init__.py index 9d954dd..c0e81cb 100644 --- a/deep_gemm/mega/__init__.py +++ b/deep_gemm/mega/__init__.py @@ -351,7 +351,9 @@ def fp8_nvfp4_mega_moe(y: torch.Tensor, zero_pct = (sf == 0).float().mean().item() * 100 if zero_pct > 50: print(f"[WARN] {name}: {zero_pct:.1f}% zeros in SF! Possible div-by-zero", flush=True) - sf_u8 = sf.view(torch.uint8) + # Need contiguous for byte view — make a contiguous copy just for the check + sf_c = sf.contiguous() + sf_u8 = sf_c.view(torch.uint8) nan_count = (sf_u8 == 0x7F).sum().item() inf_count = (sf_u8 == 0x7E).sum().item() if nan_count > 0 or inf_count > 0: