{ | |
"n_features": 25000, | |
"d_model": 768, | |
"lr_exp": -10, | |
"disable_comet": false, | |
"per_neuron_reinit_interval": 0, | |
"reservoir_time_discount": 0.995, | |
"reinit_interval": 800, | |
"max_reinit_neurons": 5000, | |
"reservoir_size": 5000, | |
"n_piles": 292, | |
"log_interval": 200, | |
"reinit_input_norm": "target_scaled", | |
"reinit_input": "error", | |
"reinit_norm_alpha": 0.3, | |
"data_loc": "attn_data", | |
"reinit_threshold": -6, | |
"scheduler": "wsd", | |
"layer_idx": 1, | |
"l1_exp": -1, | |
"neuron_reinit_percent": 0.85, | |
"beta1": 1, | |
"beta2": 4, | |
"reinit_target": "error", | |
"sparse_adam": false, | |
"run_template": "A{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}", | |
"project_name": "attn_test", | |
"decoder_bias": true, | |
"l1_beta": 0.99, | |
"alt_sparsity_loss": "log", | |
"l1_ratio": 1, | |
"l1_p": 0, | |
"optimizer": "sparse_adam", | |
"model_type": "attn_out", | |
"adam_beta1": 0.5, | |
"adam_beta2": 0.9375, | |
"run_name": "A1_S-1_R1_P0" | |
} |