Jeska commited on
Commit
1918f07
·
1 Parent(s): 551ffec

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 2.0326364040374756,
4
- "eval_runtime": 2556.1573,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 0.391,
7
- "eval_steps_per_second": 0.049,
8
- "perplexity": 7.634186651401794,
9
- "train_loss": 2.207404653632681,
10
- "train_runtime": 238732.8731,
11
  "train_samples": 19004,
12
- "train_samples_per_second": 0.08,
13
- "train_steps_per_second": 0.001
14
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 2.143171787261963,
4
+ "eval_runtime": 15.5445,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 64.332,
7
+ "eval_steps_per_second": 8.041,
8
+ "perplexity": 8.52643883529601,
9
+ "train_loss": 2.1925529711174243,
10
+ "train_runtime": 1129.3473,
11
  "train_samples": 19004,
12
+ "train_samples_per_second": 16.827,
13
+ "train_steps_per_second": 0.263
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 2.0326364040374756,
4
- "eval_runtime": 2556.1573,
5
  "eval_samples": 1000,
6
- "eval_samples_per_second": 0.391,
7
- "eval_steps_per_second": 0.049,
8
- "perplexity": 7.634186651401794
9
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 2.143171787261963,
4
+ "eval_runtime": 15.5445,
5
  "eval_samples": 1000,
6
+ "eval_samples_per_second": 64.332,
7
+ "eval_steps_per_second": 8.041,
8
+ "perplexity": 8.52643883529601
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 2.207404653632681,
4
- "train_runtime": 238732.8731,
5
  "train_samples": 19004,
6
- "train_samples_per_second": 0.08,
7
- "train_steps_per_second": 0.001
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 2.1925529711174243,
4
+ "train_runtime": 1129.3473,
5
  "train_samples": 19004,
6
+ "train_samples_per_second": 16.827,
7
+ "train_steps_per_second": 0.263
8
  }
trainer_state.json CHANGED
@@ -9,20 +9,20 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 2.0545575618743896,
13
- "eval_runtime": 2627.6485,
14
- "eval_samples_per_second": 0.381,
15
- "eval_steps_per_second": 0.048,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
  "step": 297,
21
  "total_flos": 5001918049701888.0,
22
- "train_loss": 2.207404653632681,
23
- "train_runtime": 238732.8731,
24
- "train_samples_per_second": 0.08,
25
- "train_steps_per_second": 0.001
26
  }
27
  ],
28
  "max_steps": 297,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_loss": 2.1746253967285156,
13
+ "eval_runtime": 15.7469,
14
+ "eval_samples_per_second": 63.505,
15
+ "eval_steps_per_second": 7.938,
16
  "step": 297
17
  },
18
  {
19
  "epoch": 1.0,
20
  "step": 297,
21
  "total_flos": 5001918049701888.0,
22
+ "train_loss": 2.1925529711174243,
23
+ "train_runtime": 1129.3473,
24
+ "train_samples_per_second": 16.827,
25
+ "train_steps_per_second": 0.263
26
  }
27
  ],
28
  "max_steps": 297,