- reference/vllm/tokenizers/ — official DSV4 tokenizer + encoding (read-only) - reference/vllm/reasoning/ — thinking mode parsers (DeepSeekR1 style ) - reference/vllm/tool_parsers/ — DSML tool call parsers (V3.2 base, V4 variant) - reference/official_inference/ — original weight's generate.py, model.py, kernel.py - reference/README.md documents the layout and which files matter for our pipeline - These are read-only references for cross-checking, not imported by production code
35 lines
1.0 KiB
JSON
35 lines
1.0 KiB
JSON
{
|
|
"vocab_size": 129280,
|
|
"dim": 7168,
|
|
"moe_inter_dim": 3072,
|
|
"n_layers": 61,
|
|
"n_hash_layers": 3,
|
|
"n_heads": 128,
|
|
"n_routed_experts": 384,
|
|
"n_shared_experts": 1,
|
|
"n_activated_experts": 6,
|
|
"score_func": "sqrtsoftplus",
|
|
"route_scale": 2.5,
|
|
"swiglu_limit": 10.0,
|
|
"q_lora_rank": 1536,
|
|
"head_dim": 512,
|
|
"rope_head_dim": 64,
|
|
"o_groups": 16,
|
|
"o_lora_rank": 1024,
|
|
"window_size": 128,
|
|
"original_seq_len": 65536,
|
|
"rope_theta": 10000,
|
|
"rope_factor": 16,
|
|
"beta_fast": 32,
|
|
"beta_slow": 1,
|
|
"index_n_heads": 64,
|
|
"index_head_dim": 128,
|
|
"index_topk": 1024,
|
|
"hc_mult": 4,
|
|
"hc_sinkhorn_iters": 20,
|
|
"dtype": "fp8",
|
|
"scale_fmt": "ue8m0",
|
|
"expert_dtype": "fp4",
|
|
"compress_rope_theta": 160000,
|
|
"compress_ratios": [128, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 0]
|
|
} |