From cec17fee7db2e4595f1497302e76b039461e66f0 Mon Sep 17 00:00:00 2001 From: biondizzle Date: Sun, 31 May 2026 21:26:04 +0000 Subject: [PATCH] fixed prefix --- dump_checkpoint_keys.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/dump_checkpoint_keys.py b/dump_checkpoint_keys.py index a605441c..dfe95903 100644 --- a/dump_checkpoint_keys.py +++ b/dump_checkpoint_keys.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -"""Dump checkpoint key names for specific layers only.""" +"""Dump checkpoint key names for layers 0, 2, 60 — non-expert only.""" import json from pathlib import Path -from safetensors.torch import load_file CHECKPOINT_DIR = "/root/nvidia-meeting/DeepSeek-V4-Pro" @@ -12,20 +11,26 @@ def main(): with open(index_path) as f: weight_map = json.load(f).get("weight_map", {}) - # Only show layer 0, 2, and 60 keys (non-expert) - for li in [0, 2, 60]: - prefix = f"model.layers.{li}." + for li in [0, 1, 2, 3, 59, 60]: + prefix = f"layers.{li}." keys = sorted(k for k in weight_map if k.startswith(prefix)) - # Filter out individual expert weights filtered = [k for k in keys if '.experts.' not in k] - print(f"\n=== Layer {li} keys ({len(filtered)} non-expert) ===") + print(f"\n=== Layer {li} non-expert keys ({len(filtered)}) ===") for k in filtered: print(f" {k}") - # Non-layer keys (short list) - other_keys = sorted(k for k in weight_map if not k.startswith("model.layers.")) - print(f"\n=== Non-layer keys ({len(other_keys)}) ===") - for k in other_keys: + # Also: shared_experts, hc, and mhc keys for layer 0 + prefix0 = "layers.0." + keys0 = sorted(k for k in weight_map if k.startswith(prefix0)) + se_keys = [k for k in keys0 if 'shared_expert' in k or 'hc' in k or 'ffn_norm' in k] + print(f"\n=== Layer 0 shared_expert + hc + ffn_norm keys ===") + for k in se_keys: + print(f" {k}") + + # Non-layer global keys + other = sorted(k for k in weight_map if not k.startswith("layers.")) + print(f"\n=== Global keys ===") + for k in other: print(f" {k}") if __name__ == "__main__":