File size: 928 Bytes
de1867f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
{
"n_features": 25000,
"d_model": 768,
"lr_exp": -10,
"disable_comet": false,
"per_neuron_reinit_interval": 0,
"reservoir_time_discount": 0.995,
"reinit_interval": 800,
"max_reinit_neurons": 5000,
"reservoir_size": 5000,
"n_piles": 292,
"log_interval": 200,
"reinit_input_norm": "target_scaled",
"reinit_input": "error",
"reinit_norm_alpha": 0.3,
"data_loc": "attn_data",
"reinit_threshold": -6,
"scheduler": "wsd",
"layer_idx": 3,
"l1_exp": -5,
"neuron_reinit_percent": 0.85,
"beta1": 1,
"beta2": 4,
"reinit_target": "error",
"sparse_adam": false,
"run_template": "A{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
"project_name": "attn_test",
"decoder_bias": true,
"l1_beta": 0.99,
"alt_sparsity_loss": "log",
"l1_ratio": 1,
"l1_p": 0,
"optimizer": "sparse_adam",
"model_type": "attn_out",
"adam_beta1": 0.5,
"adam_beta2": 0.9375,
"run_name": "A3_S-5_R1_P0"
} |