Jeska commited on
Commit
bdb81f8
·
1 Parent(s): e2081c9

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +10 -10
  2. eval_results.json +6 -6
  3. train_results.json +5 -5
  4. trainer_state.json +38 -16
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 2.143171787261963,
4
- "eval_runtime": 15.5445,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 64.332,
7
- "eval_steps_per_second": 8.041,
8
- "perplexity": 8.52643883529601,
9
- "train_loss": 2.1925529711174243,
10
- "train_runtime": 1129.3473,
11
  "train_samples": 19004,
12
- "train_samples_per_second": 16.827,
13
- "train_steps_per_second": 0.263
14
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_loss": 2.030430555343628,
4
+ "eval_runtime": 15.4256,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 64.827,
7
+ "eval_steps_per_second": 8.103,
8
+ "perplexity": 7.617365350191575,
9
+ "train_loss": 2.1000711460306185,
10
+ "train_runtime": 3427.7755,
11
  "train_samples": 19004,
12
+ "train_samples_per_second": 16.632,
13
+ "train_steps_per_second": 0.26
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 2.143171787261963,
4
- "eval_runtime": 15.5445,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 64.332,
7
- "eval_steps_per_second": 8.041,
8
- "perplexity": 8.52643883529601
9
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_loss": 2.030430555343628,
4
+ "eval_runtime": 15.4256,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 64.827,
7
+ "eval_steps_per_second": 8.103,
8
+ "perplexity": 7.617365350191575
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 2.1925529711174243,
4
- "train_runtime": 1129.3473,
5
  "train_samples": 19004,
6
- "train_samples_per_second": 16.827,
7
- "train_steps_per_second": 0.263
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 2.1000711460306185,
4
+ "train_runtime": 3427.7755,
5
  "train_samples": 19004,
6
+ "train_samples_per_second": 16.632,
7
+ "train_steps_per_second": 0.26
8
  }
trainer_state.json CHANGED
@@ -1,33 +1,55 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 297,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 2.1746253967285156,
13
- "eval_runtime": 15.7469,
14
- "eval_samples_per_second": 63.505,
15
- "eval_steps_per_second": 7.938,
16
  "step": 297
17
  },
18
  {
19
- "epoch": 1.0,
20
- "step": 297,
21
- "total_flos": 5001918049701888.0,
22
- "train_loss": 2.1925529711174243,
23
- "train_runtime": 1129.3473,
24
- "train_samples_per_second": 16.827,
25
- "train_steps_per_second": 0.263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
- "max_steps": 297,
29
- "num_train_epochs": 1,
30
- "total_flos": 5001918049701888.0,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "global_step": 891,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_loss": 2.170140027999878,
13
+ "eval_runtime": 15.6956,
14
+ "eval_samples_per_second": 63.712,
15
+ "eval_steps_per_second": 7.964,
16
  "step": 297
17
  },
18
  {
19
+ "epoch": 1.68,
20
+ "learning_rate": 8.77665544332211e-06,
21
+ "loss": 2.1401,
22
+ "step": 500
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_loss": 2.0152316093444824,
27
+ "eval_runtime": 15.6747,
28
+ "eval_samples_per_second": 63.797,
29
+ "eval_steps_per_second": 7.975,
30
+ "step": 594
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_loss": 2.1160194873809814,
35
+ "eval_runtime": 15.6683,
36
+ "eval_samples_per_second": 63.823,
37
+ "eval_steps_per_second": 7.978,
38
+ "step": 891
39
+ },
40
+ {
41
+ "epoch": 3.0,
42
+ "step": 891,
43
+ "total_flos": 1.5005754149105664e+16,
44
+ "train_loss": 2.1000711460306185,
45
+ "train_runtime": 3427.7755,
46
+ "train_samples_per_second": 16.632,
47
+ "train_steps_per_second": 0.26
48
  }
49
  ],
50
+ "max_steps": 891,
51
+ "num_train_epochs": 3,
52
+ "total_flos": 1.5005754149105664e+16,
53
  "trial_name": null,
54
  "trial_params": null
55
  }