VISOR-GPT / train /models /llama /7b_config.json
szukevin's picture
upload
7900c16
raw
history blame
493 Bytes
{
"emb_size": 4096,
"feedforward_size": 11008,
"hidden_size": 4096,
"hidden_act": "silu",
"heads_num": 32,
"layers_num": 32,
"dropout": 0.1,
"data_processor": "lm",
"max_seq_length": 2048,
"embedding": ["word"],
"remove_transformer_bias": true,
"remove_embedding_layernorm": true,
"rotary_position_embedding": true,
"encoder": "transformer",
"feed_forward": "gated",
"mask": "causal",
"layernorm_positioning": "pre",
"layernorm": "rms",
"target": ["lm"]
}