Files
nvfp4-megamoe-kernel/dump_checkpoint_keys.py
2026-05-31 21:26:04 +00:00

38 lines
1.3 KiB
Python

#!/usr/bin/env python3
"""Dump checkpoint key names for layers 0, 2, 60 — non-expert only."""
import json
from pathlib import Path
CHECKPOINT_DIR = "/root/nvidia-meeting/DeepSeek-V4-Pro"
def main():
cdir = Path(CHECKPOINT_DIR)
index_path = cdir / "model.safetensors.index.json"
with open(index_path) as f:
weight_map = json.load(f).get("weight_map", {})
for li in [0, 1, 2, 3, 59, 60]:
prefix = f"layers.{li}."
keys = sorted(k for k in weight_map if k.startswith(prefix))
filtered = [k for k in keys if '.experts.' not in k]
print(f"\n=== Layer {li} non-expert keys ({len(filtered)}) ===")
for k in filtered:
print(f" {k}")
# Also: shared_experts, hc, and mhc keys for layer 0
prefix0 = "layers.0."
keys0 = sorted(k for k in weight_map if k.startswith(prefix0))
se_keys = [k for k in keys0 if 'shared_expert' in k or 'hc' in k or 'ffn_norm' in k]
print(f"\n=== Layer 0 shared_expert + hc + ffn_norm keys ===")
for k in se_keys:
print(f" {k}")
# Non-layer global keys
other = sorted(k for k in weight_map if not k.startswith("layers."))
print(f"\n=== Global keys ===")
for k in other:
print(f" {k}")
if __name__ == "__main__":
main()