fix: contiguous copy for SF byte view sanity check
This commit is contained in:
@@ -351,7 +351,9 @@ def fp8_nvfp4_mega_moe(y: torch.Tensor,
|
||||
zero_pct = (sf == 0).float().mean().item() * 100
|
||||
if zero_pct > 50:
|
||||
print(f"[WARN] {name}: {zero_pct:.1f}% zeros in SF! Possible div-by-zero", flush=True)
|
||||
sf_u8 = sf.view(torch.uint8)
|
||||
# Need contiguous for byte view — make a contiguous copy just for the check
|
||||
sf_c = sf.contiguous()
|
||||
sf_u8 = sf_c.view(torch.uint8)
|
||||
nan_count = (sf_u8 == 0x7F).sum().item()
|
||||
inf_count = (sf_u8 == 0x7E).sum().item()
|
||||
if nan_count > 0 or inf_count > 0:
|
||||
|
||||
Reference in New Issue
Block a user