{ "emb_size": 1280, "feedforward_size": 5120, "hidden_size": 1280, "hidden_act": "gelu_fast", "heads_num": 20, "layers_num": 36, "max_seq_length": 1024, "dropout": 0.1, "data_processor": "lm", "embedding": ["word", "pos"], "remove_embedding_layernorm": true, "encoder": "transformer", "mask": "causal", "layernorm_positioning": "pre", "target": ["lm"], "tie_weights": true }