38 lines
1.3 KiB
Python
38 lines
1.3 KiB
Python
#!/usr/bin/env python3
|
|
"""Dump checkpoint key names for layers 0, 2, 60 — non-expert only."""
|
|
import json
|
|
from pathlib import Path
|
|
|
|
CHECKPOINT_DIR = "/root/nvidia-meeting/DeepSeek-V4-Pro"
|
|
|
|
def main():
|
|
cdir = Path(CHECKPOINT_DIR)
|
|
index_path = cdir / "model.safetensors.index.json"
|
|
with open(index_path) as f:
|
|
weight_map = json.load(f).get("weight_map", {})
|
|
|
|
for li in [0, 1, 2, 3, 59, 60]:
|
|
prefix = f"layers.{li}."
|
|
keys = sorted(k for k in weight_map if k.startswith(prefix))
|
|
filtered = [k for k in keys if '.experts.' not in k]
|
|
print(f"\n=== Layer {li} non-expert keys ({len(filtered)}) ===")
|
|
for k in filtered:
|
|
print(f" {k}")
|
|
|
|
# Also: shared_experts, hc, and mhc keys for layer 0
|
|
prefix0 = "layers.0."
|
|
keys0 = sorted(k for k in weight_map if k.startswith(prefix0))
|
|
se_keys = [k for k in keys0 if 'shared_expert' in k or 'hc' in k or 'ffn_norm' in k]
|
|
print(f"\n=== Layer 0 shared_expert + hc + ffn_norm keys ===")
|
|
for k in se_keys:
|
|
print(f" {k}")
|
|
|
|
# Non-layer global keys
|
|
other = sorted(k for k in weight_map if not k.startswith("layers."))
|
|
print(f"\n=== Global keys ===")
|
|
for k in other:
|
|
print(f" {k}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|