#!/usr/bin/env python3 """Unit test for the NVFP4 weights mapper. Validates that checkpoint key names from our ModelOpt-quantized DeepSeek-V4-Pro checkpoint are correctly mapped to vLLM model parameter names. This can run WITHOUT vLLM or CUDA — it only tests the mapper logic. """ import re import sys from typing import Optional class WeightsMapper: """Simplified WeightsMapper for testing.""" def __init__( self, orig_to_new_prefix: Optional[dict] = None, orig_to_new_regex: Optional[dict] = None, orig_to_new_suffix: Optional[dict] = None, orig_to_new_substr: Optional[dict] = None, ): self.prefix_map = orig_to_new_prefix or {} self.regex_map = orig_to_new_regex or {} self.suffix_map = orig_to_new_suffix or {} self.substr_map = orig_to_new_substr or {} def map_name(self, name: str) -> str: # 1. Prefix for old, new in self.prefix_map.items(): if name.startswith(old): name = new + name[len(old):] break # 2. Regex for pattern, replacement in self.regex_map.items(): name = pattern.sub(replacement, name) # 3. Suffix for old, new in self.suffix_map.items(): if name.endswith(old): name = name[: -len(old)] + new break # 4. Substr (ordered dict — specific before general) for old, new in self.substr_map.items(): if old in name: name = name.replace(old, new, 1) return name def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper: """Exact copy of the mapper from deepseek_v4.py.""" expert_rename_regex = { re.compile(r"(\.experts\.\d+\.)gate_proj\."): r"\1w1.", re.compile(r"(\.experts\.\d+\.)up_proj\."): r"\1w3.", re.compile(r"(\.experts\.\d+\.)down_proj\."): r"\1w2.", } suffix_renames = {} substr_renames = { # === Compressor (non-indexer) NVFP4 renames === "compressor.kv_proj.": "compressor.wkv.", "compressor.gate_proj.": "compressor.wgate.", "compressor.kv_norm.": "compressor.norm.", "compressor.position_bias": "compressor.ape", # === Attention compressor (before indexer renames) === ".self_attn.compressor.": ".attn.mla_attn.compressor.", # === Indexer params === "compressor.indexer.q_b_proj.": "indexer.wq_b.", "compressor.indexer.weights_proj.": "indexer.weights_proj.", "compressor.indexer.kv_norm.": "indexer.k_norm.", "compressor.indexer.kv_proj.": "indexer.compressor.wkv.", "compressor.indexer.gate_proj.": "indexer.compressor.wgate.", "compressor.indexer.position_bias": "indexer.compressor.ape", # === Attention projections === ".self_attn.q_a_proj.": ".attn.wq_a.", ".self_attn.kv_proj.": ".attn.wkv.", ".self_attn.q_b_proj.": ".attn.wq_b.", ".self_attn.o_a_proj.": ".attn.wo_a.", ".self_attn.o_b_proj.": ".attn.wo_b.", ".self_attn.q_a_norm.": ".attn.q_a_norm.", ".self_attn.kv_norm.": ".attn.kv_norm.", ".self_attn.sinks": ".attn.sinks", # Shared expert projections ".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.", ".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.", ".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.", # General renames ".mlp.": ".ffn.", ".self_attn.": ".attn.", } return WeightsMapper( orig_to_new_prefix={ "layers.": "model.layers.", "embed_tokens.": "model.embed_tokens.", "norm.": "model.norm.", "hc_head": "model.hc_head", "mtp.": "model.mtp.", }, orig_to_new_regex=expert_rename_regex, orig_to_new_suffix=suffix_renames, orig_to_new_substr=substr_renames, ) TEST_CASES = [ # Embedding & top-level ("embed_tokens.weight", "model.embed_tokens.weight"), ("norm.weight", "model.norm.weight"), ("hc_head.hc_fn", "model.hc_head.hc_fn"), ("hc_head.hc_base", "model.hc_head.hc_base"), ("hc_head.hc_scale", "model.hc_head.hc_scale"), ("lm_head.weight", "lm_head.weight"), # Attention — self_attn → attn ("layers.0.self_attn.q_a_proj.weight", "model.layers.0.attn.wq_a.weight"), ("layers.0.self_attn.q_a_proj.input_scale", "model.layers.0.attn.wq_a.input_scale"), ("layers.0.self_attn.kv_proj.weight", "model.layers.0.attn.wkv.weight"), ("layers.0.self_attn.q_b_proj.weight", "model.layers.0.attn.wq_b.weight"), ("layers.0.self_attn.o_a_proj.weight", "model.layers.0.attn.wo_a.weight"), ("layers.0.self_attn.o_b_proj.weight", "model.layers.0.attn.wo_b.weight"), ("layers.0.self_attn.o_b_proj.input_scale", "model.layers.0.attn.wo_b.input_scale"), ("layers.0.self_attn.q_a_norm.weight", "model.layers.0.attn.q_a_norm.weight"), ("layers.0.self_attn.kv_norm.weight", "model.layers.0.attn.kv_norm.weight"), ("layers.0.self_attn.sinks", "model.layers.0.attn.sinks"), # Compressor (non-indexer): kv_proj → wkv, gate_proj → wgate ("layers.0.self_attn.compressor.kv_proj.weight", "model.layers.0.attn.mla_attn.compressor.wkv.weight"), ("layers.0.self_attn.compressor.kv_proj.input_scale", "model.layers.0.attn.mla_attn.compressor.wkv.input_scale"), ("layers.0.self_attn.compressor.kv_proj.weight_scale", "model.layers.0.attn.mla_attn.compressor.wkv.weight_scale"), ("layers.0.self_attn.compressor.kv_proj.weight_scale_2", "model.layers.0.attn.mla_attn.compressor.wkv.weight_scale_2"), ("layers.0.self_attn.compressor.gate_proj.weight", "model.layers.0.attn.mla_attn.compressor.wgate.weight"), ("layers.0.self_attn.compressor.gate_proj.input_scale", "model.layers.0.attn.mla_attn.compressor.wgate.input_scale"), ("layers.0.self_attn.compressor.gate_proj.weight_scale", "model.layers.0.attn.mla_attn.compressor.wgate.weight_scale"), ("layers.0.self_attn.compressor.gate_proj.weight_scale_2", "model.layers.0.attn.mla_attn.compressor.wgate.weight_scale_2"), ("layers.0.self_attn.compressor.kv_norm.weight", "model.layers.0.attn.mla_attn.compressor.norm.weight"), ("layers.0.self_attn.compressor.position_bias", "model.layers.0.attn.mla_attn.compressor.ape"), # Indexer own params ("layers.10.self_attn.compressor.indexer.q_b_proj.weight", "model.layers.10.attn.mla_attn.indexer.wq_b.weight"), ("layers.10.self_attn.compressor.indexer.q_b_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.wq_b.input_scale"), ("layers.10.self_attn.compressor.indexer.weights_proj.weight", "model.layers.10.attn.mla_attn.indexer.weights_proj.weight"), ("layers.10.self_attn.compressor.indexer.weights_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.weights_proj.input_scale"), ("layers.10.self_attn.compressor.indexer.kv_norm.weight", "model.layers.10.attn.mla_attn.indexer.k_norm.weight"), # Indexer's compressor ("layers.10.self_attn.compressor.indexer.kv_proj.weight", "model.layers.10.attn.mla_attn.indexer.compressor.wkv.weight"), ("layers.10.self_attn.compressor.indexer.kv_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.compressor.wkv.input_scale"), ("layers.10.self_attn.compressor.indexer.gate_proj.weight", "model.layers.10.attn.mla_attn.indexer.compressor.wgate.weight"), ("layers.10.self_attn.compressor.indexer.gate_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.compressor.wgate.input_scale"), ("layers.10.self_attn.compressor.indexer.position_bias", "model.layers.10.attn.mla_attn.indexer.compressor.ape"), # MoE gate ("layers.0.mlp.gate.tid2eid", "model.layers.0.ffn.gate.tid2eid"), ("layers.0.mlp.gate.weight", "model.layers.0.ffn.gate.weight"), # Expert weights — gate_proj → w1, up_proj → w3, down_proj → w2 ("layers.0.mlp.experts.0.gate_proj.weight", "model.layers.0.ffn.experts.0.w1.weight"), ("layers.0.mlp.experts.0.up_proj.weight", "model.layers.0.ffn.experts.0.w3.weight"), ("layers.0.mlp.experts.0.down_proj.weight", "model.layers.0.ffn.experts.0.w2.weight"), ("layers.0.mlp.experts.0.gate_proj.input_scale", "model.layers.0.ffn.experts.0.w1.input_scale"), ("layers.0.mlp.experts.0.gate_proj.weight_scale", "model.layers.0.ffn.experts.0.w1.weight_scale"), ("layers.0.mlp.experts.0.gate_proj.weight_scale_2", "model.layers.0.ffn.experts.0.w1.weight_scale_2"), # Shared experts ("layers.0.mlp.shared_experts.gate_proj.weight", "model.layers.0.ffn.shared_experts.w1.weight"), ("layers.0.mlp.shared_experts.up_proj.weight", "model.layers.0.ffn.shared_experts.w3.weight"), ("layers.0.mlp.shared_experts.down_proj.weight", "model.layers.0.ffn.shared_experts.down_proj.weight"), ("layers.0.mlp.shared_experts.gate_proj.input_scale", "model.layers.0.ffn.shared_experts.w1.input_scale"), ("layers.0.mlp.shared_experts.down_proj.weight_scale", "model.layers.0.ffn.shared_experts.down_proj.weight_scale"), # Layer norm ("layers.0.post_attention_layernorm.weight", "model.layers.0.post_attention_layernorm.weight"), ] def main(): mapper = _make_deepseek_v4_nvfp4_weights_mapper() passed = 0 failed = 0 for ckpt_key, expected in TEST_CASES: result = mapper.map_name(ckpt_key) if result == expected: passed += 1 else: failed += 1 print(f"FAIL: {ckpt_key}") print(f" expected: {expected}") print(f" got: {result}") print(f"\n{passed} passed, {failed} failed") return 0 if failed == 0 else 1 if __name__ == "__main__": sys.exit(main())