deepseekv3 / inference /configs /config_16B.json
Gokuldaskumar's picture
Duplicate from deepseek-ai/DeepSeek-V3
c3a00f0 verified
raw
history blame contribute delete
417 Bytes
{
"vocab_size": 102400,
"dim": 2048,
"inter_dim": 10944,
"moe_inter_dim": 1408,
"n_layers": 27,
"n_dense_layers": 1,
"n_heads": 16,
"n_routed_experts": 64,
"n_shared_experts": 2,
"n_activated_experts": 6,
"route_scale": 1.0,
"q_lora_rank": 0,
"kv_lora_rank": 512,
"qk_nope_head_dim": 128,
"qk_rope_head_dim": 64,
"v_head_dim": 128,
"mscale": 0.707
}