End of training
Browse files
all_results.json
CHANGED
@@ -2,13 +2,13 @@
|
|
2 |
"epoch": 15.0,
|
3 |
"eval_accuracy": 0.9067641496658325,
|
4 |
"eval_loss": 0.4963526129722595,
|
5 |
-
"eval_runtime": 2.
|
6 |
"eval_samples": 1094,
|
7 |
-
"eval_samples_per_second": 507.
|
8 |
-
"eval_steps_per_second": 63.
|
9 |
"train_loss": 0.7935866661746093,
|
10 |
-
"train_runtime":
|
11 |
"train_samples": 10556,
|
12 |
-
"train_samples_per_second":
|
13 |
-
"train_steps_per_second": 9.
|
14 |
}
|
|
|
2 |
"epoch": 15.0,
|
3 |
"eval_accuracy": 0.9067641496658325,
|
4 |
"eval_loss": 0.4963526129722595,
|
5 |
+
"eval_runtime": 2.1572,
|
6 |
"eval_samples": 1094,
|
7 |
+
"eval_samples_per_second": 507.137,
|
8 |
+
"eval_steps_per_second": 63.508,
|
9 |
"train_loss": 0.7935866661746093,
|
10 |
+
"train_runtime": 2196.4523,
|
11 |
"train_samples": 10556,
|
12 |
+
"train_samples_per_second": 72.089,
|
13 |
+
"train_steps_per_second": 9.015
|
14 |
}
|
eval_results.json
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
"epoch": 15.0,
|
3 |
"eval_accuracy": 0.9067641496658325,
|
4 |
"eval_loss": 0.4963526129722595,
|
5 |
-
"eval_runtime": 2.
|
6 |
"eval_samples": 1094,
|
7 |
-
"eval_samples_per_second": 507.
|
8 |
-
"eval_steps_per_second": 63.
|
9 |
}
|
|
|
2 |
"epoch": 15.0,
|
3 |
"eval_accuracy": 0.9067641496658325,
|
4 |
"eval_loss": 0.4963526129722595,
|
5 |
+
"eval_runtime": 2.1572,
|
6 |
"eval_samples": 1094,
|
7 |
+
"eval_samples_per_second": 507.137,
|
8 |
+
"eval_steps_per_second": 63.508
|
9 |
}
|
runs/Dec15_13-02-52_jbuhmann/events.out.tfevents.1639573387.jbuhmann.27644.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81dc8d880fb7bfe8bf848cf48003ee2dfcbe1431c695df973adbccb0603994f2
|
3 |
+
size 27801
|
runs/Dec15_13-02-52_jbuhmann/events.out.tfevents.1639575587.jbuhmann.27644.2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b1002843e24880cba7c24beedfff50921e1c6eec0957b6ad3bc3f049f6dc8e1
|
3 |
+
size 369
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 15.0,
|
3 |
"train_loss": 0.7935866661746093,
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 10556,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 9.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 15.0,
|
3 |
"train_loss": 0.7935866661746093,
|
4 |
+
"train_runtime": 2196.4523,
|
5 |
"train_samples": 10556,
|
6 |
+
"train_samples_per_second": 72.089,
|
7 |
+
"train_steps_per_second": 9.015
|
8 |
}
|
trainer_state.json
CHANGED
@@ -23,9 +23,9 @@
|
|
23 |
"epoch": 1.0,
|
24 |
"eval_accuracy": 0.4232175648212433,
|
25 |
"eval_loss": 3.169081211090088,
|
26 |
-
"eval_runtime": 2.
|
27 |
-
"eval_samples_per_second":
|
28 |
-
"eval_steps_per_second": 63.
|
29 |
"step": 1320
|
30 |
},
|
31 |
{
|
@@ -50,9 +50,9 @@
|
|
50 |
"epoch": 2.0,
|
51 |
"eval_accuracy": 0.6435100436210632,
|
52 |
"eval_loss": 2.04946231842041,
|
53 |
-
"eval_runtime": 2.
|
54 |
-
"eval_samples_per_second":
|
55 |
-
"eval_steps_per_second":
|
56 |
"step": 2640
|
57 |
},
|
58 |
{
|
@@ -71,9 +71,9 @@
|
|
71 |
"epoch": 3.0,
|
72 |
"eval_accuracy": 0.7861060500144958,
|
73 |
"eval_loss": 1.3566279411315918,
|
74 |
-
"eval_runtime": 2.
|
75 |
-
"eval_samples_per_second":
|
76 |
-
"eval_steps_per_second":
|
77 |
"step": 3960
|
78 |
},
|
79 |
{
|
@@ -98,9 +98,9 @@
|
|
98 |
"epoch": 4.0,
|
99 |
"eval_accuracy": 0.8555758595466614,
|
100 |
"eval_loss": 0.9564884305000305,
|
101 |
-
"eval_runtime": 2.
|
102 |
-
"eval_samples_per_second":
|
103 |
-
"eval_steps_per_second":
|
104 |
"step": 5280
|
105 |
},
|
106 |
{
|
@@ -125,9 +125,9 @@
|
|
125 |
"epoch": 5.0,
|
126 |
"eval_accuracy": 0.880255937576294,
|
127 |
"eval_loss": 0.7118942141532898,
|
128 |
-
"eval_runtime": 2.
|
129 |
-
"eval_samples_per_second":
|
130 |
-
"eval_steps_per_second":
|
131 |
"step": 6600
|
132 |
},
|
133 |
{
|
@@ -146,9 +146,9 @@
|
|
146 |
"epoch": 6.0,
|
147 |
"eval_accuracy": 0.8912248611450195,
|
148 |
"eval_loss": 0.5804843902587891,
|
149 |
-
"eval_runtime": 2.
|
150 |
-
"eval_samples_per_second":
|
151 |
-
"eval_steps_per_second":
|
152 |
"step": 7920
|
153 |
},
|
154 |
{
|
@@ -173,9 +173,9 @@
|
|
173 |
"epoch": 7.0,
|
174 |
"eval_accuracy": 0.8994515538215637,
|
175 |
"eval_loss": 0.5123510956764221,
|
176 |
-
"eval_runtime": 2.
|
177 |
-
"eval_samples_per_second":
|
178 |
-
"eval_steps_per_second":
|
179 |
"step": 9240
|
180 |
},
|
181 |
{
|
@@ -200,9 +200,9 @@
|
|
200 |
"epoch": 8.0,
|
201 |
"eval_accuracy": 0.9021937847137451,
|
202 |
"eval_loss": 0.49525973200798035,
|
203 |
-
"eval_runtime": 2.
|
204 |
-
"eval_samples_per_second":
|
205 |
-
"eval_steps_per_second":
|
206 |
"step": 10560
|
207 |
},
|
208 |
{
|
@@ -221,9 +221,9 @@
|
|
221 |
"epoch": 9.0,
|
222 |
"eval_accuracy": 0.9021937847137451,
|
223 |
"eval_loss": 0.486285001039505,
|
224 |
-
"eval_runtime": 2.
|
225 |
-
"eval_samples_per_second":
|
226 |
-
"eval_steps_per_second":
|
227 |
"step": 11880
|
228 |
},
|
229 |
{
|
@@ -248,9 +248,9 @@
|
|
248 |
"epoch": 10.0,
|
249 |
"eval_accuracy": 0.9049360156059265,
|
250 |
"eval_loss": 0.48929038643836975,
|
251 |
-
"eval_runtime": 2.
|
252 |
-
"eval_samples_per_second":
|
253 |
-
"eval_steps_per_second": 66.
|
254 |
"step": 13200
|
255 |
},
|
256 |
{
|
@@ -275,9 +275,9 @@
|
|
275 |
"epoch": 11.0,
|
276 |
"eval_accuracy": 0.9076782464981079,
|
277 |
"eval_loss": 0.485994428396225,
|
278 |
-
"eval_runtime": 2.
|
279 |
-
"eval_samples_per_second":
|
280 |
-
"eval_steps_per_second":
|
281 |
"step": 14520
|
282 |
},
|
283 |
{
|
@@ -296,9 +296,9 @@
|
|
296 |
"epoch": 12.0,
|
297 |
"eval_accuracy": 0.9058501124382019,
|
298 |
"eval_loss": 0.4888700246810913,
|
299 |
-
"eval_runtime": 2.
|
300 |
-
"eval_samples_per_second":
|
301 |
-
"eval_steps_per_second":
|
302 |
"step": 15840
|
303 |
},
|
304 |
{
|
@@ -323,9 +323,9 @@
|
|
323 |
"epoch": 13.0,
|
324 |
"eval_accuracy": 0.9067641496658325,
|
325 |
"eval_loss": 0.4926171898841858,
|
326 |
-
"eval_runtime": 2.
|
327 |
-
"eval_samples_per_second":
|
328 |
-
"eval_steps_per_second":
|
329 |
"step": 17160
|
330 |
},
|
331 |
{
|
@@ -344,9 +344,9 @@
|
|
344 |
"epoch": 14.0,
|
345 |
"eval_accuracy": 0.9067641496658325,
|
346 |
"eval_loss": 0.495064914226532,
|
347 |
-
"eval_runtime": 2.
|
348 |
-
"eval_samples_per_second":
|
349 |
-
"eval_steps_per_second":
|
350 |
"step": 18480
|
351 |
},
|
352 |
{
|
@@ -371,9 +371,9 @@
|
|
371 |
"epoch": 15.0,
|
372 |
"eval_accuracy": 0.9067641496658325,
|
373 |
"eval_loss": 0.4963526129722595,
|
374 |
-
"eval_runtime": 2.
|
375 |
-
"eval_samples_per_second":
|
376 |
-
"eval_steps_per_second": 66.
|
377 |
"step": 19800
|
378 |
},
|
379 |
{
|
@@ -381,9 +381,9 @@
|
|
381 |
"step": 19800,
|
382 |
"total_flos": 5215995096399360.0,
|
383 |
"train_loss": 0.7935866661746093,
|
384 |
-
"train_runtime":
|
385 |
-
"train_samples_per_second":
|
386 |
-
"train_steps_per_second": 9.
|
387 |
}
|
388 |
],
|
389 |
"max_steps": 19800,
|
|
|
23 |
"epoch": 1.0,
|
24 |
"eval_accuracy": 0.4232175648212433,
|
25 |
"eval_loss": 3.169081211090088,
|
26 |
+
"eval_runtime": 2.1415,
|
27 |
+
"eval_samples_per_second": 510.858,
|
28 |
+
"eval_steps_per_second": 63.974,
|
29 |
"step": 1320
|
30 |
},
|
31 |
{
|
|
|
50 |
"epoch": 2.0,
|
51 |
"eval_accuracy": 0.6435100436210632,
|
52 |
"eval_loss": 2.04946231842041,
|
53 |
+
"eval_runtime": 2.1238,
|
54 |
+
"eval_samples_per_second": 515.114,
|
55 |
+
"eval_steps_per_second": 64.507,
|
56 |
"step": 2640
|
57 |
},
|
58 |
{
|
|
|
71 |
"epoch": 3.0,
|
72 |
"eval_accuracy": 0.7861060500144958,
|
73 |
"eval_loss": 1.3566279411315918,
|
74 |
+
"eval_runtime": 2.0996,
|
75 |
+
"eval_samples_per_second": 521.061,
|
76 |
+
"eval_steps_per_second": 65.252,
|
77 |
"step": 3960
|
78 |
},
|
79 |
{
|
|
|
98 |
"epoch": 4.0,
|
99 |
"eval_accuracy": 0.8555758595466614,
|
100 |
"eval_loss": 0.9564884305000305,
|
101 |
+
"eval_runtime": 2.1078,
|
102 |
+
"eval_samples_per_second": 519.033,
|
103 |
+
"eval_steps_per_second": 64.998,
|
104 |
"step": 5280
|
105 |
},
|
106 |
{
|
|
|
125 |
"epoch": 5.0,
|
126 |
"eval_accuracy": 0.880255937576294,
|
127 |
"eval_loss": 0.7118942141532898,
|
128 |
+
"eval_runtime": 2.0744,
|
129 |
+
"eval_samples_per_second": 527.383,
|
130 |
+
"eval_steps_per_second": 66.043,
|
131 |
"step": 6600
|
132 |
},
|
133 |
{
|
|
|
146 |
"epoch": 6.0,
|
147 |
"eval_accuracy": 0.8912248611450195,
|
148 |
"eval_loss": 0.5804843902587891,
|
149 |
+
"eval_runtime": 2.0875,
|
150 |
+
"eval_samples_per_second": 524.077,
|
151 |
+
"eval_steps_per_second": 65.629,
|
152 |
"step": 7920
|
153 |
},
|
154 |
{
|
|
|
173 |
"epoch": 7.0,
|
174 |
"eval_accuracy": 0.8994515538215637,
|
175 |
"eval_loss": 0.5123510956764221,
|
176 |
+
"eval_runtime": 2.1263,
|
177 |
+
"eval_samples_per_second": 514.504,
|
178 |
+
"eval_steps_per_second": 64.431,
|
179 |
"step": 9240
|
180 |
},
|
181 |
{
|
|
|
200 |
"epoch": 8.0,
|
201 |
"eval_accuracy": 0.9021937847137451,
|
202 |
"eval_loss": 0.49525973200798035,
|
203 |
+
"eval_runtime": 2.0716,
|
204 |
+
"eval_samples_per_second": 528.088,
|
205 |
+
"eval_steps_per_second": 66.132,
|
206 |
"step": 10560
|
207 |
},
|
208 |
{
|
|
|
221 |
"epoch": 9.0,
|
222 |
"eval_accuracy": 0.9021937847137451,
|
223 |
"eval_loss": 0.486285001039505,
|
224 |
+
"eval_runtime": 2.0803,
|
225 |
+
"eval_samples_per_second": 525.893,
|
226 |
+
"eval_steps_per_second": 65.857,
|
227 |
"step": 11880
|
228 |
},
|
229 |
{
|
|
|
248 |
"epoch": 10.0,
|
249 |
"eval_accuracy": 0.9049360156059265,
|
250 |
"eval_loss": 0.48929038643836975,
|
251 |
+
"eval_runtime": 2.0752,
|
252 |
+
"eval_samples_per_second": 527.187,
|
253 |
+
"eval_steps_per_second": 66.019,
|
254 |
"step": 13200
|
255 |
},
|
256 |
{
|
|
|
275 |
"epoch": 11.0,
|
276 |
"eval_accuracy": 0.9076782464981079,
|
277 |
"eval_loss": 0.485994428396225,
|
278 |
+
"eval_runtime": 2.1075,
|
279 |
+
"eval_samples_per_second": 519.1,
|
280 |
+
"eval_steps_per_second": 65.006,
|
281 |
"step": 14520
|
282 |
},
|
283 |
{
|
|
|
296 |
"epoch": 12.0,
|
297 |
"eval_accuracy": 0.9058501124382019,
|
298 |
"eval_loss": 0.4888700246810913,
|
299 |
+
"eval_runtime": 2.0826,
|
300 |
+
"eval_samples_per_second": 525.296,
|
301 |
+
"eval_steps_per_second": 65.782,
|
302 |
"step": 15840
|
303 |
},
|
304 |
{
|
|
|
323 |
"epoch": 13.0,
|
324 |
"eval_accuracy": 0.9067641496658325,
|
325 |
"eval_loss": 0.4926171898841858,
|
326 |
+
"eval_runtime": 2.3107,
|
327 |
+
"eval_samples_per_second": 473.444,
|
328 |
+
"eval_steps_per_second": 59.289,
|
329 |
"step": 17160
|
330 |
},
|
331 |
{
|
|
|
344 |
"epoch": 14.0,
|
345 |
"eval_accuracy": 0.9067641496658325,
|
346 |
"eval_loss": 0.495064914226532,
|
347 |
+
"eval_runtime": 2.1748,
|
348 |
+
"eval_samples_per_second": 503.039,
|
349 |
+
"eval_steps_per_second": 62.995,
|
350 |
"step": 18480
|
351 |
},
|
352 |
{
|
|
|
371 |
"epoch": 15.0,
|
372 |
"eval_accuracy": 0.9067641496658325,
|
373 |
"eval_loss": 0.4963526129722595,
|
374 |
+
"eval_runtime": 2.0713,
|
375 |
+
"eval_samples_per_second": 528.178,
|
376 |
+
"eval_steps_per_second": 66.143,
|
377 |
"step": 19800
|
378 |
},
|
379 |
{
|
|
|
381 |
"step": 19800,
|
382 |
"total_flos": 5215995096399360.0,
|
383 |
"train_loss": 0.7935866661746093,
|
384 |
+
"train_runtime": 2196.4523,
|
385 |
+
"train_samples_per_second": 72.089,
|
386 |
+
"train_steps_per_second": 9.015
|
387 |
}
|
388 |
],
|
389 |
"max_steps": 19800,
|