File size: 928 Bytes
de1867f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{
  "n_features": 25000,
  "d_model": 768,
  "lr_exp": -10,
  "disable_comet": false,
  "per_neuron_reinit_interval": 0,
  "reservoir_time_discount": 0.995,
  "reinit_interval": 800,
  "max_reinit_neurons": 5000,
  "reservoir_size": 5000,
  "n_piles": 292,
  "log_interval": 200,
  "reinit_input_norm": "target_scaled",
  "reinit_input": "error",
  "reinit_norm_alpha": 0.3,
  "data_loc": "attn_data",
  "reinit_threshold": -6,
  "scheduler": "wsd",
  "layer_idx": 3,
  "l1_exp": -5,
  "neuron_reinit_percent": 0.85,
  "beta1": 1,
  "beta2": 4,
  "reinit_target": "error",
  "sparse_adam": false,
  "run_template": "A{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
  "project_name": "attn_test",
  "decoder_bias": true,
  "l1_beta": 0.99,
  "alt_sparsity_loss": "log",
  "l1_ratio": 1,
  "l1_p": 0,
  "optimizer": "sparse_adam",
  "model_type": "attn_out",
  "adam_beta1": 0.5,
  "adam_beta2": 0.9375,
  "run_name": "A3_S-5_R1_P0"
}