Add resume capability to dequant script (skip already-done shards)

Verified our FP4 dequant is byte-identical to official transformers
MXFP4 implementation. Max diff = 0.0 across all values.
This commit is contained in:
2026-05-08 02:58:24 +00:00
parent f63eed5cfd
commit b70a04696e

View File

@@ -210,6 +210,11 @@ def dequantize_model(model_dir: str, out_dir: str):
stats["scales_removed"] += 1
out_path = os.path.join(out_dir, os.path.basename(f))
if os.path.exists(out_path) and os.path.getsize(out_path) > 0:
# Resume: skip already-dequantized shards
print(f"[{i+1}/{total_shards}] Skipping (already done): {os.path.basename(f)}")
del tensors, scales_in_shard
continue
save_file(tensors, out_path)
shard_time = time.time() - shard_start