Jeska commited on
Commit
78dcc46
·
1 Parent(s): ec96d53

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +11 -11
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +76 -28
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_loss": 2.030430555343628,
4
- "eval_runtime": 15.4256,
5
- "eval_samples": 1000,
6
- "eval_samples_per_second": 64.827,
7
- "eval_steps_per_second": 8.103,
8
- "perplexity": 7.617365350191575,
9
- "train_loss": 2.1000711460306185,
10
- "train_runtime": 3427.7755,
11
- "train_samples": 19004,
12
- "train_samples_per_second": 16.632,
13
- "train_steps_per_second": 0.26
14
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 1.920763373374939,
4
+ "eval_runtime": 81.4759,
5
+ "eval_samples": 5193,
6
+ "eval_samples_per_second": 63.737,
7
+ "eval_steps_per_second": 7.978,
8
+ "perplexity": 6.826167395302532,
9
+ "train_loss": 2.110191111022448,
10
+ "train_runtime": 17635.5568,
11
+ "train_samples": 98673,
12
+ "train_samples_per_second": 16.785,
13
+ "train_steps_per_second": 0.262
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_loss": 2.030430555343628,
4
- "eval_runtime": 15.4256,
5
- "eval_samples": 1000,
6
- "eval_samples_per_second": 64.827,
7
- "eval_steps_per_second": 8.103,
8
- "perplexity": 7.617365350191575
9
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_loss": 1.920763373374939,
4
+ "eval_runtime": 81.4759,
5
+ "eval_samples": 5193,
6
+ "eval_samples_per_second": 63.737,
7
+ "eval_steps_per_second": 7.978,
8
+ "perplexity": 6.826167395302532
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 2.1000711460306185,
4
- "train_runtime": 3427.7755,
5
- "train_samples": 19004,
6
- "train_samples_per_second": 16.632,
7
- "train_steps_per_second": 0.26
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 2.110191111022448,
4
+ "train_runtime": 17635.5568,
5
+ "train_samples": 98673,
6
+ "train_samples_per_second": 16.785,
7
+ "train_steps_per_second": 0.262
8
  }
trainer_state.json CHANGED
@@ -2,54 +2,102 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
- "global_step": 891,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 2.170140027999878,
13
- "eval_runtime": 15.6956,
14
- "eval_samples_per_second": 63.712,
15
- "eval_steps_per_second": 7.964,
16
- "step": 297
17
  },
18
  {
19
- "epoch": 1.68,
20
- "learning_rate": 8.77665544332211e-06,
21
- "loss": 2.1401,
22
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_loss": 2.0152316093444824,
27
- "eval_runtime": 15.6747,
28
- "eval_samples_per_second": 63.797,
29
- "eval_steps_per_second": 7.975,
30
- "step": 594
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 2.1160194873809814,
35
- "eval_runtime": 15.6683,
36
- "eval_samples_per_second": 63.823,
37
- "eval_steps_per_second": 7.978,
38
- "step": 891
39
  },
40
  {
41
  "epoch": 3.0,
42
- "step": 891,
43
- "total_flos": 1.5005754149105664e+16,
44
- "train_loss": 2.1000711460306185,
45
- "train_runtime": 3427.7755,
46
- "train_samples_per_second": 16.632,
47
- "train_steps_per_second": 0.26
48
  }
49
  ],
50
- "max_steps": 891,
51
  "num_train_epochs": 3,
52
- "total_flos": 1.5005754149105664e+16,
53
  "trial_name": null,
54
  "trial_params": null
55
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
+ "global_step": 4626,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.32,
12
+ "learning_rate": 1.7838305231301342e-05,
13
+ "loss": 2.2294,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.65,
18
+ "learning_rate": 1.5676610462602683e-05,
19
+ "loss": 2.1683,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.97,
24
+ "learning_rate": 1.3514915693904023e-05,
25
+ "loss": 2.1713,
26
+ "step": 1500
27
+ },
28
  {
29
  "epoch": 1.0,
30
+ "eval_loss": 2.009819507598877,
31
+ "eval_runtime": 82.235,
32
+ "eval_samples_per_second": 63.148,
33
+ "eval_steps_per_second": 7.904,
34
+ "step": 1542
35
  },
36
  {
37
+ "epoch": 1.3,
38
+ "learning_rate": 1.1353220925205362e-05,
39
+ "loss": 2.1142,
40
+ "step": 2000
41
+ },
42
+ {
43
+ "epoch": 1.62,
44
+ "learning_rate": 9.191526156506702e-06,
45
+ "loss": 2.0871,
46
+ "step": 2500
47
+ },
48
+ {
49
+ "epoch": 1.95,
50
+ "learning_rate": 7.029831387808041e-06,
51
+ "loss": 2.0736,
52
+ "step": 3000
53
  },
54
  {
55
  "epoch": 2.0,
56
+ "eval_loss": 1.9852919578552246,
57
+ "eval_runtime": 82.023,
58
+ "eval_samples_per_second": 63.312,
59
+ "eval_steps_per_second": 7.925,
60
+ "step": 3084
61
+ },
62
+ {
63
+ "epoch": 2.27,
64
+ "learning_rate": 4.8681366191093824e-06,
65
+ "loss": 2.0674,
66
+ "step": 3500
67
+ },
68
+ {
69
+ "epoch": 2.59,
70
+ "learning_rate": 2.706441850410722e-06,
71
+ "loss": 2.0482,
72
+ "step": 4000
73
+ },
74
+ {
75
+ "epoch": 2.92,
76
+ "learning_rate": 5.447470817120623e-07,
77
+ "loss": 2.0543,
78
+ "step": 4500
79
  },
80
  {
81
  "epoch": 3.0,
82
+ "eval_loss": 2.013369083404541,
83
+ "eval_runtime": 82.0996,
84
+ "eval_samples_per_second": 63.252,
85
+ "eval_steps_per_second": 7.917,
86
+ "step": 4626
87
  },
88
  {
89
  "epoch": 3.0,
90
+ "step": 4626,
91
+ "total_flos": 7.791321717294797e+16,
92
+ "train_loss": 2.110191111022448,
93
+ "train_runtime": 17635.5568,
94
+ "train_samples_per_second": 16.785,
95
+ "train_steps_per_second": 0.262
96
  }
97
  ],
98
+ "max_steps": 4626,
99
  "num_train_epochs": 3,
100
+ "total_flos": 7.791321717294797e+16,
101
  "trial_name": null,
102
  "trial_params": null
103
  }