VISOR-GPT / train /models /t5-v1_1 /xlarge_config.json
szukevin's picture
upload
7900c16
raw
history blame
654 Bytes
{
"emb_size": 2048,
"feedforward_size": 5120,
"hidden_size": 2048,
"hidden_act": "gelu_fast",
"attention_head_size": 64,
"heads_num": 32,
"layers_num": 24,
"decoder_layers_num": 24,
"dropout": 0.0,
"data_processor": "t5",
"embedding": ["word"],
"relative_position_embedding": true,
"remove_embedding_layernorm": true,
"tgt_embedding": ["word"],
"share_embedding": true,
"encoder": "transformer",
"mask": "fully_visible",
"layernorm_positioning": "pre",
"feed_forward": "gated",
"remove_attention_scale": true,
"layernorm": "t5",
"remove_transformer_bias": true,
"decoder": "transformer",
"target": ["lm"]
}