noanabeshima commited on
Commit
e95dd23
1 Parent(s): b3d6158

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. mlp_map_test/M0_S-2_R16_P2.pt +3 -0
  2. mlp_map_test/M0_S-2_R16_P2_config.json +38 -0
  3. mlp_map_test/M0_S-2_R8_P2.pt +3 -0
  4. mlp_map_test/M0_S-2_R8_P2_config.json +38 -0
  5. mlp_map_test/M0_S-2_R8_P4.pt +3 -0
  6. mlp_map_test/M0_S-2_R8_P4_config.json +38 -0
  7. mlp_map_test/M0_S-4_R16_P6.pt +3 -0
  8. mlp_map_test/M0_S-4_R16_P6_config.json +38 -0
  9. mlp_map_test/M0_S-4_R4_P2.pt +3 -0
  10. mlp_map_test/M0_S-4_R4_P2_config.json +38 -0
  11. mlp_map_test/M0_S-4_R8_P2.pt +3 -0
  12. mlp_map_test/M0_S-4_R8_P2_config.json +38 -0
  13. mlp_map_test/M0_S-6_R16_P6.pt +3 -0
  14. mlp_map_test/M0_S-6_R16_P6_config.json +38 -0
  15. mlp_map_test/M0_S-6_R2_P2.pt +3 -0
  16. mlp_map_test/M0_S-6_R2_P2_config.json +38 -0
  17. mlp_map_test/M0_S-6_R4_P2.pt +3 -0
  18. mlp_map_test/M0_S-6_R4_P2_config.json +38 -0
  19. mlp_map_test/M0_S-6_R4_P5.pt +3 -0
  20. mlp_map_test/M0_S-6_R4_P5_config.json +38 -0
  21. mlp_map_test/M0_S-6_R8_P3.pt +3 -0
  22. mlp_map_test/M0_S-6_R8_P3_config.json +38 -0
  23. mlp_map_test/M0_S-6_R8_P4.pt +3 -0
  24. mlp_map_test/M0_S-6_R8_P4_config.json +38 -0
  25. mlp_map_test/M0_S-8_R4_P1.pt +3 -0
  26. mlp_map_test/M0_S-8_R4_P1_config.json +38 -0
  27. mlp_map_test/M0_S-8_R4_P2.pt +3 -0
  28. mlp_map_test/M0_S-8_R4_P2_config.json +38 -0
  29. mlp_map_test/M0_S-8_R8_P1.pt +3 -0
  30. mlp_map_test/M0_S-8_R8_P1_config.json +38 -0
  31. mlp_map_test/M0_S0_R16_P5.pt +3 -0
  32. mlp_map_test/M0_S0_R16_P5_config.json +38 -0
  33. mlp_map_test/M0_S0_R4_P2.pt +3 -0
  34. mlp_map_test/M0_S0_R4_P2_config.json +38 -0
  35. mlp_map_test/M0_S0_R4_P6.pt +3 -0
  36. mlp_map_test/M0_S0_R4_P6_config.json +38 -0
  37. mlp_map_test/M0_S0_R8_P4.pt +3 -0
  38. mlp_map_test/M0_S0_R8_P4_config.json +38 -0
  39. mlp_map_test/M1_S-2_R16_P2.pt +3 -0
  40. mlp_map_test/M1_S-2_R16_P2_config.json +38 -0
  41. mlp_map_test/M1_S-2_R2_P3.pt +3 -0
  42. mlp_map_test/M1_S-2_R2_P3_config.json +38 -0
  43. mlp_map_test/M1_S-4_R16_P1.pt +3 -0
  44. mlp_map_test/M1_S-4_R16_P1_config.json +38 -0
  45. mlp_map_test/M1_S-4_R2_P6.pt +3 -0
  46. mlp_map_test/M1_S-4_R2_P6_config.json +38 -0
  47. mlp_map_test/M1_S-4_R4_P3.pt +3 -0
  48. mlp_map_test/M1_S-4_R4_P3_config.json +38 -0
  49. mlp_map_test/M1_S-4_R8_P2.pt +3 -0
  50. mlp_map_test/M1_S-4_R8_P2_config.json +38 -0
mlp_map_test/M0_S-2_R16_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9359d6761c0678872c24614fdc2e33a95859ed8893721a009e4efb7312c1a6ab
3
+ size 153705088
mlp_map_test/M0_S-2_R16_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R16_P2"
38
+ }
mlp_map_test/M0_S-2_R8_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72fe4a438c7cb4fd200d69aec08f833f29172203aa7422efcd88ff57df0b9249
3
+ size 153705080
mlp_map_test/M0_S-2_R8_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R8_P2"
38
+ }
mlp_map_test/M0_S-2_R8_P4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1006aa4821d67c80e35c63ada50d443ae7e342f4f8afda3490b91d734e26125a
3
+ size 153705080
mlp_map_test/M0_S-2_R8_P4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 4,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-2_R8_P4"
38
+ }
mlp_map_test/M0_S-4_R16_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c15de0bd8db6ce5aac80986803982acbdb2eaefcce9cd64978ced7511fa26595
3
+ size 153705088
mlp_map_test/M0_S-4_R16_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R16_P6"
38
+ }
mlp_map_test/M0_S-4_R4_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f48785465f336071949bcff0e1d1f4b3ca656ea66f6270a4d64c5075d0db263c
3
+ size 153705080
mlp_map_test/M0_S-4_R4_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R4_P2"
38
+ }
mlp_map_test/M0_S-4_R8_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b5ebb8036e34338f3433ff2c2c7d6032ff76c4bb87b9b8dc17ed7203b6eba1
3
+ size 153705080
mlp_map_test/M0_S-4_R8_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-4_R8_P2"
38
+ }
mlp_map_test/M0_S-6_R16_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81e71c6a3ed2625c96875a73a66234e685f89b140852e496bf28a17d1f412a6a
3
+ size 153705088
mlp_map_test/M0_S-6_R16_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R16_P6"
38
+ }
mlp_map_test/M0_S-6_R2_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac77f117cfc79c30cb2f10b415fee609eb8513a8b4fe301f4249743b7fb0cc5b
3
+ size 153705080
mlp_map_test/M0_S-6_R2_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R2_P2"
38
+ }
mlp_map_test/M0_S-6_R4_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d41efbd610f4e13f9131487d9125f2aeaf128541d04b2e8e4fc4daa71f0388
3
+ size 153705080
mlp_map_test/M0_S-6_R4_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R4_P2"
38
+ }
mlp_map_test/M0_S-6_R4_P5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d887b70c10c59158d64b4a325077f3da2ff37b6f859823460862440c6f543f53
3
+ size 153705080
mlp_map_test/M0_S-6_R4_P5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 5,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R4_P5"
38
+ }
mlp_map_test/M0_S-6_R8_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a27cd0f5f150680d8dadcedb6f7ae34d350f24c9ac5be09c109e01ef7a56e4c
3
+ size 153705080
mlp_map_test/M0_S-6_R8_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R8_P3"
38
+ }
mlp_map_test/M0_S-6_R8_P4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5599d0290d3bf8b4260f38e3ec921e462755391b1078fc56c0b0136bb5f43c24
3
+ size 153705080
mlp_map_test/M0_S-6_R8_P4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -6,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 4,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-6_R8_P4"
38
+ }
mlp_map_test/M0_S-8_R4_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b00c6230ca71c87b2d192db96dc3d211db347d242016d760f8f55f06b38ddb34
3
+ size 153705080
mlp_map_test/M0_S-8_R4_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-8_R4_P1"
38
+ }
mlp_map_test/M0_S-8_R4_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e396ed9e61013a12663e2e9a389ce34d902d049b5ff7fa82454a354b5e42ae99
3
+ size 153705080
mlp_map_test/M0_S-8_R4_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-8_R4_P2"
38
+ }
mlp_map_test/M0_S-8_R8_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4817e4afa0c953637942fb2b90a6e6cf9ea6963f37b8bcd9f603eb7920e560c
3
+ size 153705080
mlp_map_test/M0_S-8_R8_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": -8,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S-8_R8_P1"
38
+ }
mlp_map_test/M0_S0_R16_P5.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45672861bca72a3dede8880454bfeb06b2d633bf93961ea9f8a0f59625493b27
3
+ size 153705080
mlp_map_test/M0_S0_R16_P5_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 5,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S0_R16_P5"
38
+ }
mlp_map_test/M0_S0_R4_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a000f854a07edbaffba6e09296d3996f2d4f7e1368497b7cbe3b06debc21892
3
+ size 153705072
mlp_map_test/M0_S0_R4_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S0_R4_P2"
38
+ }
mlp_map_test/M0_S0_R4_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb98206eace11c3c691ef6773637c1a21f7d2b9e99907223b630dd4fdb06e142
3
+ size 153705072
mlp_map_test/M0_S0_R4_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S0_R4_P6"
38
+ }
mlp_map_test/M0_S0_R8_P4.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfd70e40905225d54508805bec0bf51bd96dc77812a19df653ed398cf10c9731
3
+ size 153705072
mlp_map_test/M0_S0_R8_P4_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 0,
20
+ "l1_exp": 0,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 4,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M0_S0_R8_P4"
38
+ }
mlp_map_test/M1_S-2_R16_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37b722f62ac9871daec727c03512026863cf1b8a896c7112ea7678163b4da5f1
3
+ size 153705088
mlp_map_test/M1_S-2_R16_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M1_S-2_R16_P2"
38
+ }
mlp_map_test/M1_S-2_R2_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4c4a14c610deb1cd5583f68e1ec84b5aa63a89b3dd36fd49e49f5a775467b3
3
+ size 153705080
mlp_map_test/M1_S-2_R2_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -2,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M1_S-2_R2_P3"
38
+ }
mlp_map_test/M1_S-4_R16_P1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42beb86f03a41ac4830dd421a5ce373d75b39188bd2712cbefaa003fab9e6279
3
+ size 153705088
mlp_map_test/M1_S-4_R16_P1_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 16,
32
+ "l1_p": 1,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M1_S-4_R16_P1"
38
+ }
mlp_map_test/M1_S-4_R2_P6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f729f2319c7d471c0029acb1022a93276bb159bb8815cda5bc66f2cb529da19e
3
+ size 153705080
mlp_map_test/M1_S-4_R2_P6_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 2,
32
+ "l1_p": 6,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M1_S-4_R2_P6"
38
+ }
mlp_map_test/M1_S-4_R4_P3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf90a29a6f0befb83974f530c3c2497ca54df69e4da67e8f285b83ddacea492f
3
+ size 153705080
mlp_map_test/M1_S-4_R4_P3_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 4,
32
+ "l1_p": 3,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M1_S-4_R4_P3"
38
+ }
mlp_map_test/M1_S-4_R8_P2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ccbd7776c7e2f458d1dda3932307757741dd39f221daa7b0bf8ff94284f296d
3
+ size 153705080
mlp_map_test/M1_S-4_R8_P2_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_features": 25000,
3
+ "d_model": 768,
4
+ "lr_exp": -10,
5
+ "disable_comet": false,
6
+ "per_neuron_reinit_interval": 0,
7
+ "reservoir_time_discount": 0.995,
8
+ "reinit_interval": 800,
9
+ "max_reinit_neurons": 5000,
10
+ "reservoir_size": 5000,
11
+ "n_piles": 292,
12
+ "log_interval": 200,
13
+ "reinit_input_norm": "target_scaled",
14
+ "reinit_input": "x",
15
+ "reinit_norm_alpha": 0.3,
16
+ "data_loc": "mlp_data",
17
+ "reinit_threshold": -6,
18
+ "scheduler": "wsd",
19
+ "layer_idx": 1,
20
+ "l1_exp": -4,
21
+ "neuron_reinit_percent": 0.85,
22
+ "beta1": 1,
23
+ "beta2": 4,
24
+ "reinit_target": "y",
25
+ "sparse_adam": false,
26
+ "run_template": "M{layer_idx}_S{l1_exp}_R{l1_ratio}_P{l1_p}",
27
+ "project_name": "mlp_map_test",
28
+ "decoder_bias": true,
29
+ "l1_beta": 0.99,
30
+ "alt_sparsity_loss": "log",
31
+ "l1_ratio": 8,
32
+ "l1_p": 2,
33
+ "optimizer": "sparse_adam",
34
+ "model_type": "mlp_map",
35
+ "adam_beta1": 0.5,
36
+ "adam_beta2": 0.9375,
37
+ "run_name": "M1_S-4_R8_P2"
38
+ }