diff --git a/patches/deepseek_v4.py b/patches/deepseek_v4.py
index 8878c4d..59da91b 100644
--- a/patches/deepseek_v4.py
+++ b/patches/deepseek_v4.py
@@ -1950,6 +1950,18 @@ class DeepseekV4Model(nn.Module):
             # scale_fmt=ue8m0: weight_scale bytes are E8M0 format (power-of-2 only).
             # A simple .to(float32) misinterprets them as E4M3. Must reinterpret
             # the raw uint8 bits as IEEE 754 exponent field.
+            # Diagnostic: histogram of raw weight_scale uint8 bytes
+            # E8M0 values cluster narrowly (120-130 = 2^-7 to 2^3)
+            # E4M3 values spread across 0-127 with mantissa noise
+            ws_u8 = mod.weight_scale.data.view(torch.uint8)
+            ws_u8_flat = ws_u8.flatten()[:256].cpu().numpy()
+            import numpy as _np
+            _hist, _edges = _np.histogram(ws_u8_flat, bins=16, range=(0, 255))
+            _prefix = getattr(mod, 'prefix', 'unknown')
+            print(f"[SCALE-FMT] {_prefix}: uint8 histogram (first 256 vals): "
+                  f"{list(zip(_edges.astype(int), _hist))}")
+            print(f"[SCALE-FMT] {_prefix}: uint8 min={ws_u8.min().item()} max={ws_u8.max().item()} "
+                  f"mean={ws_u8.float().mean().item():.1f} std={ws_u8.float().std().item():.1f}")
             block_scale = self._ue8m0_to_float32(mod.weight_scale.data)
             if block_scale.dim() == 2 and w_bf16.dim() == 2:
                 block_size = w_bf16.shape[1] // block_scale.shape[1]