VISOR-GPT / train /models /gpt2 /xlarge_config.json
szukevin's picture
upload
7900c16
raw
history blame
406 Bytes
{
"emb_size": 1600,
"feedforward_size": 6400,
"hidden_size": 1600,
"hidden_act": "gelu_fast",
"heads_num": 25,
"layers_num": 48,
"max_seq_length": 1024,
"dropout": 0.1,
"data_processor": "lm",
"embedding": ["word", "pos"],
"remove_embedding_layernorm": true,
"encoder": "transformer",
"mask": "causal",
"layernorm_positioning": "pre",
"target": ["lm"],
"tie_weights": true
}