Files
nvfp4-megamoe-kernel/tests/test_nvfp4_mapper.py

205 lines
9.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""Unit test for the NVFP4 weights mapper.
Validates that checkpoint key names from our ModelOpt-quantized
DeepSeek-V4-Pro checkpoint are correctly mapped to vLLM model
parameter names.
This can run WITHOUT vLLM or CUDA it only tests the mapper logic.
"""
import re
import sys
from typing import Optional
class WeightsMapper:
"""Simplified WeightsMapper for testing."""
def __init__(
self,
orig_to_new_prefix: Optional[dict] = None,
orig_to_new_regex: Optional[dict] = None,
orig_to_new_suffix: Optional[dict] = None,
orig_to_new_substr: Optional[dict] = None,
):
self.prefix_map = orig_to_new_prefix or {}
self.regex_map = orig_to_new_regex or {}
self.suffix_map = orig_to_new_suffix or {}
self.substr_map = orig_to_new_substr or {}
def map_name(self, name: str) -> str:
# 1. Prefix
for old, new in self.prefix_map.items():
if name.startswith(old):
name = new + name[len(old):]
break
# 2. Regex
for pattern, replacement in self.regex_map.items():
name = pattern.sub(replacement, name)
# 3. Suffix
for old, new in self.suffix_map.items():
if name.endswith(old):
name = name[: -len(old)] + new
break
# 4. Substr (ordered dict — specific before general)
for old, new in self.substr_map.items():
if old in name:
name = name.replace(old, new, 1)
return name
def _make_deepseek_v4_nvfp4_weights_mapper() -> WeightsMapper:
"""Exact copy of the mapper from deepseek_v4.py."""
expert_rename_regex = {
re.compile(r"(\.experts\.\d+\.)gate_proj\."): r"\1w1.",
re.compile(r"(\.experts\.\d+\.)up_proj\."): r"\1w3.",
re.compile(r"(\.experts\.\d+\.)down_proj\."): r"\1w2.",
}
suffix_renames = {}
substr_renames = {
# === Compressor (non-indexer) NVFP4 renames ===
"compressor.kv_proj.": "compressor.wkv.",
"compressor.gate_proj.": "compressor.wgate.",
"compressor.kv_norm.": "compressor.norm.",
"compressor.position_bias": "compressor.ape",
# === Attention compressor (before indexer renames) ===
".self_attn.compressor.": ".attn.mla_attn.compressor.",
# === Indexer params ===
"compressor.indexer.q_b_proj.": "indexer.wq_b.",
"compressor.indexer.weights_proj.": "indexer.weights_proj.",
"compressor.indexer.kv_norm.": "indexer.k_norm.",
"compressor.indexer.kv_proj.": "indexer.compressor.wkv.",
"compressor.indexer.gate_proj.": "indexer.compressor.wgate.",
"compressor.indexer.position_bias": "indexer.compressor.ape",
# === Attention projections ===
".self_attn.q_a_proj.": ".attn.wq_a.",
".self_attn.kv_proj.": ".attn.wkv.",
".self_attn.q_b_proj.": ".attn.wq_b.",
".self_attn.o_a_proj.": ".attn.wo_a.",
".self_attn.o_b_proj.": ".attn.wo_b.",
".self_attn.q_a_norm.": ".attn.q_a_norm.",
".self_attn.kv_norm.": ".attn.kv_norm.",
".self_attn.sinks": ".attn.sinks",
# Shared expert projections
".mlp.shared_experts.gate_proj.": ".ffn.shared_experts.w1.",
".mlp.shared_experts.up_proj.": ".ffn.shared_experts.w3.",
".mlp.shared_experts.down_proj.": ".ffn.shared_experts.down_proj.",
# General renames
".mlp.": ".ffn.",
".self_attn.": ".attn.",
}
return WeightsMapper(
orig_to_new_prefix={
"layers.": "model.layers.",
"embed_tokens.": "model.embed_tokens.",
"norm.": "model.norm.",
"hc_head": "model.hc_head",
"mtp.": "model.mtp.",
},
orig_to_new_regex=expert_rename_regex,
orig_to_new_suffix=suffix_renames,
orig_to_new_substr=substr_renames,
)
TEST_CASES = [
# Embedding & top-level
("embed_tokens.weight", "model.embed_tokens.weight"),
("norm.weight", "model.norm.weight"),
("hc_head.hc_fn", "model.hc_head.hc_fn"),
("hc_head.hc_base", "model.hc_head.hc_base"),
("hc_head.hc_scale", "model.hc_head.hc_scale"),
("lm_head.weight", "lm_head.weight"),
# Attention — self_attn → attn
("layers.0.self_attn.q_a_proj.weight", "model.layers.0.attn.wq_a.weight"),
("layers.0.self_attn.q_a_proj.input_scale", "model.layers.0.attn.wq_a.input_scale"),
("layers.0.self_attn.kv_proj.weight", "model.layers.0.attn.wkv.weight"),
("layers.0.self_attn.q_b_proj.weight", "model.layers.0.attn.wq_b.weight"),
("layers.0.self_attn.o_a_proj.weight", "model.layers.0.attn.wo_a.weight"),
("layers.0.self_attn.o_b_proj.weight", "model.layers.0.attn.wo_b.weight"),
("layers.0.self_attn.o_b_proj.input_scale", "model.layers.0.attn.wo_b.input_scale"),
("layers.0.self_attn.q_a_norm.weight", "model.layers.0.attn.q_a_norm.weight"),
("layers.0.self_attn.kv_norm.weight", "model.layers.0.attn.kv_norm.weight"),
("layers.0.self_attn.sinks", "model.layers.0.attn.sinks"),
# Compressor (non-indexer): kv_proj → wkv, gate_proj → wgate
("layers.0.self_attn.compressor.kv_proj.weight", "model.layers.0.attn.mla_attn.compressor.wkv.weight"),
("layers.0.self_attn.compressor.kv_proj.input_scale", "model.layers.0.attn.mla_attn.compressor.wkv.input_scale"),
("layers.0.self_attn.compressor.kv_proj.weight_scale", "model.layers.0.attn.mla_attn.compressor.wkv.weight_scale"),
("layers.0.self_attn.compressor.kv_proj.weight_scale_2", "model.layers.0.attn.mla_attn.compressor.wkv.weight_scale_2"),
("layers.0.self_attn.compressor.gate_proj.weight", "model.layers.0.attn.mla_attn.compressor.wgate.weight"),
("layers.0.self_attn.compressor.gate_proj.input_scale", "model.layers.0.attn.mla_attn.compressor.wgate.input_scale"),
("layers.0.self_attn.compressor.gate_proj.weight_scale", "model.layers.0.attn.mla_attn.compressor.wgate.weight_scale"),
("layers.0.self_attn.compressor.gate_proj.weight_scale_2", "model.layers.0.attn.mla_attn.compressor.wgate.weight_scale_2"),
("layers.0.self_attn.compressor.kv_norm.weight", "model.layers.0.attn.mla_attn.compressor.norm.weight"),
("layers.0.self_attn.compressor.position_bias", "model.layers.0.attn.mla_attn.compressor.ape"),
# Indexer own params
("layers.10.self_attn.compressor.indexer.q_b_proj.weight", "model.layers.10.attn.mla_attn.indexer.wq_b.weight"),
("layers.10.self_attn.compressor.indexer.q_b_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.wq_b.input_scale"),
("layers.10.self_attn.compressor.indexer.weights_proj.weight", "model.layers.10.attn.mla_attn.indexer.weights_proj.weight"),
("layers.10.self_attn.compressor.indexer.weights_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.weights_proj.input_scale"),
("layers.10.self_attn.compressor.indexer.kv_norm.weight", "model.layers.10.attn.mla_attn.indexer.k_norm.weight"),
# Indexer's compressor
("layers.10.self_attn.compressor.indexer.kv_proj.weight", "model.layers.10.attn.mla_attn.indexer.compressor.wkv.weight"),
("layers.10.self_attn.compressor.indexer.kv_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.compressor.wkv.input_scale"),
("layers.10.self_attn.compressor.indexer.gate_proj.weight", "model.layers.10.attn.mla_attn.indexer.compressor.wgate.weight"),
("layers.10.self_attn.compressor.indexer.gate_proj.input_scale", "model.layers.10.attn.mla_attn.indexer.compressor.wgate.input_scale"),
("layers.10.self_attn.compressor.indexer.position_bias", "model.layers.10.attn.mla_attn.indexer.compressor.ape"),
# MoE gate
("layers.0.mlp.gate.tid2eid", "model.layers.0.ffn.gate.tid2eid"),
("layers.0.mlp.gate.weight", "model.layers.0.ffn.gate.weight"),
# Expert weights — gate_proj → w1, up_proj → w3, down_proj → w2
("layers.0.mlp.experts.0.gate_proj.weight", "model.layers.0.ffn.experts.0.w1.weight"),
("layers.0.mlp.experts.0.up_proj.weight", "model.layers.0.ffn.experts.0.w3.weight"),
("layers.0.mlp.experts.0.down_proj.weight", "model.layers.0.ffn.experts.0.w2.weight"),
("layers.0.mlp.experts.0.gate_proj.input_scale", "model.layers.0.ffn.experts.0.w1.input_scale"),
("layers.0.mlp.experts.0.gate_proj.weight_scale", "model.layers.0.ffn.experts.0.w1.weight_scale"),
("layers.0.mlp.experts.0.gate_proj.weight_scale_2", "model.layers.0.ffn.experts.0.w1.weight_scale_2"),
# Shared experts
("layers.0.mlp.shared_experts.gate_proj.weight", "model.layers.0.ffn.shared_experts.w1.weight"),
("layers.0.mlp.shared_experts.up_proj.weight", "model.layers.0.ffn.shared_experts.w3.weight"),
("layers.0.mlp.shared_experts.down_proj.weight", "model.layers.0.ffn.shared_experts.down_proj.weight"),
("layers.0.mlp.shared_experts.gate_proj.input_scale", "model.layers.0.ffn.shared_experts.w1.input_scale"),
("layers.0.mlp.shared_experts.down_proj.weight_scale", "model.layers.0.ffn.shared_experts.down_proj.weight_scale"),
# Layer norm
("layers.0.post_attention_layernorm.weight", "model.layers.0.post_attention_layernorm.weight"),
]
def main():
mapper = _make_deepseek_v4_nvfp4_weights_mapper()
passed = 0
failed = 0
for ckpt_key, expected in TEST_CASES:
result = mapper.map_name(ckpt_key)
if result == expected:
passed += 1
else:
failed += 1
print(f"FAIL: {ckpt_key}")
print(f" expected: {expected}")
print(f" got: {result}")
print(f"\n{passed} passed, {failed} failed")
return 0 if failed == 0 else 1
if __name__ == "__main__":
sys.exit(main())