Wanjiru commited on
Commit
b0acc73
1 Parent(s): e2cbfa7

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +562 -0
trainer_state.json ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 7.0,
5
+ "global_step": 26299,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.27,
12
+ "learning_rate": 1.923951481044907e-05,
13
+ "loss": 0.0205,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "eval_EG_f1": 0.9960332310455804,
19
+ "eval_ET_f1": 0.9988771614641814,
20
+ "eval_TE_f1": 0.9952833720146739,
21
+ "eval_loss": 0.0021067976485937834,
22
+ "eval_overall_accuracy": 0.9995180550845189,
23
+ "eval_overall_f1": 0.9967312922623949,
24
+ "eval_overall_precision": 0.9966566866267466,
25
+ "eval_overall_recall": 0.9968059090682237,
26
+ "eval_runtime": 16.2281,
27
+ "eval_samples_per_second": 411.569,
28
+ "eval_steps_per_second": 25.758,
29
+ "step": 1000
30
+ },
31
+ {
32
+ "epoch": 0.53,
33
+ "learning_rate": 1.8479029620898135e-05,
34
+ "loss": 0.0027,
35
+ "step": 2000
36
+ },
37
+ {
38
+ "epoch": 0.53,
39
+ "eval_EG_f1": 0.9955841628620612,
40
+ "eval_ET_f1": 0.9992513849378649,
41
+ "eval_TE_f1": 0.995433789954338,
42
+ "eval_loss": 0.0024818568490445614,
43
+ "eval_overall_accuracy": 0.9993931064027275,
44
+ "eval_overall_f1": 0.9967563251659264,
45
+ "eval_overall_precision": 0.9966568534504266,
46
+ "eval_overall_recall": 0.9968558167390328,
47
+ "eval_runtime": 16.4446,
48
+ "eval_samples_per_second": 406.152,
49
+ "eval_steps_per_second": 25.419,
50
+ "step": 2000
51
+ },
52
+ {
53
+ "epoch": 0.8,
54
+ "learning_rate": 1.77185444313472e-05,
55
+ "loss": 0.0018,
56
+ "step": 3000
57
+ },
58
+ {
59
+ "epoch": 0.8,
60
+ "eval_EG_f1": 0.9987274496594056,
61
+ "eval_ET_f1": 0.999700553975146,
62
+ "eval_TE_f1": 0.9982783142450782,
63
+ "eval_loss": 0.0007141608512029052,
64
+ "eval_overall_accuracy": 0.9998393516948396,
65
+ "eval_overall_f1": 0.9989020860365306,
66
+ "eval_overall_precision": 0.9988522381356355,
67
+ "eval_overall_recall": 0.9989519389130109,
68
+ "eval_runtime": 16.3629,
69
+ "eval_samples_per_second": 408.181,
70
+ "eval_steps_per_second": 25.546,
71
+ "step": 3000
72
+ },
73
+ {
74
+ "epoch": 1.06,
75
+ "learning_rate": 1.695805924179627e-05,
76
+ "loss": 0.0015,
77
+ "step": 4000
78
+ },
79
+ {
80
+ "epoch": 1.06,
81
+ "eval_EG_f1": 0.9991765850737331,
82
+ "eval_ET_f1": 0.9992513849378649,
83
+ "eval_TE_f1": 0.9983530468633028,
84
+ "eval_loss": 0.0010974227916449308,
85
+ "eval_overall_accuracy": 0.9997858022597862,
86
+ "eval_overall_f1": 0.9989270118527761,
87
+ "eval_overall_precision": 0.9989020860365306,
88
+ "eval_overall_recall": 0.9989519389130109,
89
+ "eval_runtime": 16.7722,
90
+ "eval_samples_per_second": 398.219,
91
+ "eval_steps_per_second": 24.922,
92
+ "step": 4000
93
+ },
94
+ {
95
+ "epoch": 1.33,
96
+ "learning_rate": 1.6197574052245332e-05,
97
+ "loss": 0.0005,
98
+ "step": 5000
99
+ },
100
+ {
101
+ "epoch": 1.33,
102
+ "eval_EG_f1": 0.9982030548068285,
103
+ "eval_ET_f1": 0.999850276987573,
104
+ "eval_TE_f1": 0.9979038778260219,
105
+ "eval_loss": 0.0007138435612432659,
106
+ "eval_overall_accuracy": 0.9998125769773129,
107
+ "eval_overall_f1": 0.998652425633859,
108
+ "eval_overall_precision": 0.998702271025705,
109
+ "eval_overall_recall": 0.9986025852173479,
110
+ "eval_runtime": 16.5358,
111
+ "eval_samples_per_second": 403.912,
112
+ "eval_steps_per_second": 25.279,
113
+ "step": 5000
114
+ },
115
+ {
116
+ "epoch": 1.6,
117
+ "learning_rate": 1.54370888626944e-05,
118
+ "loss": 0.0008,
119
+ "step": 6000
120
+ },
121
+ {
122
+ "epoch": 1.6,
123
+ "eval_EG_f1": 0.9983530468633028,
124
+ "eval_ET_f1": 0.999550830962719,
125
+ "eval_TE_f1": 0.9979038778260219,
126
+ "eval_loss": 0.0009068112121894956,
127
+ "eval_overall_accuracy": 0.9997947271656284,
128
+ "eval_overall_f1": 0.9986025852173479,
129
+ "eval_overall_precision": 0.9986025852173479,
130
+ "eval_overall_recall": 0.9986025852173479,
131
+ "eval_runtime": 16.3597,
132
+ "eval_samples_per_second": 408.259,
133
+ "eval_steps_per_second": 25.551,
134
+ "step": 6000
135
+ },
136
+ {
137
+ "epoch": 1.86,
138
+ "learning_rate": 1.4676603673143466e-05,
139
+ "loss": 0.0009,
140
+ "step": 7000
141
+ },
142
+ {
143
+ "epoch": 1.86,
144
+ "eval_EG_f1": 0.9989519389130109,
145
+ "eval_ET_f1": 0.999850276987573,
146
+ "eval_TE_f1": 0.9988022159005839,
147
+ "eval_loss": 0.0010577181819826365,
148
+ "eval_overall_accuracy": 0.9998482766006819,
149
+ "eval_overall_f1": 0.999201477267056,
150
+ "eval_overall_precision": 0.999201477267056,
151
+ "eval_overall_recall": 0.999201477267056,
152
+ "eval_runtime": 16.3447,
153
+ "eval_samples_per_second": 408.634,
154
+ "eval_steps_per_second": 25.574,
155
+ "step": 7000
156
+ },
157
+ {
158
+ "epoch": 2.13,
159
+ "learning_rate": 1.3916118483592534e-05,
160
+ "loss": 0.0005,
161
+ "step": 8000
162
+ },
163
+ {
164
+ "epoch": 2.13,
165
+ "eval_EG_f1": 0.9991765850737331,
166
+ "eval_ET_f1": 0.999850276987573,
167
+ "eval_TE_f1": 0.9991765850737331,
168
+ "eval_loss": 0.0005780701176263392,
169
+ "eval_overall_accuracy": 0.9998839762240508,
170
+ "eval_overall_f1": 0.9994011378381076,
171
+ "eval_overall_precision": 0.9993512650331853,
172
+ "eval_overall_recall": 0.999451015621101,
173
+ "eval_runtime": 16.6712,
174
+ "eval_samples_per_second": 400.631,
175
+ "eval_steps_per_second": 25.073,
176
+ "step": 8000
177
+ },
178
+ {
179
+ "epoch": 2.4,
180
+ "learning_rate": 1.31556332940416e-05,
181
+ "loss": 0.0002,
182
+ "step": 9000
183
+ },
184
+ {
185
+ "epoch": 2.4,
186
+ "eval_EG_f1": 0.9988771614641814,
187
+ "eval_ET_f1": 0.999850276987573,
188
+ "eval_TE_f1": 0.9985777378546299,
189
+ "eval_loss": 0.0005696099251508713,
190
+ "eval_overall_accuracy": 0.999892901129893,
191
+ "eval_overall_f1": 0.9991017067571614,
192
+ "eval_overall_precision": 0.9990518488946554,
193
+ "eval_overall_recall": 0.999151569596247,
194
+ "eval_runtime": 16.4448,
195
+ "eval_samples_per_second": 406.147,
196
+ "eval_steps_per_second": 25.418,
197
+ "step": 9000
198
+ },
199
+ {
200
+ "epoch": 2.66,
201
+ "learning_rate": 1.2395148104490666e-05,
202
+ "loss": 0.0005,
203
+ "step": 10000
204
+ },
205
+ {
206
+ "epoch": 2.66,
207
+ "eval_EG_f1": 0.9989519389130109,
208
+ "eval_ET_f1": 0.999850276987573,
209
+ "eval_TE_f1": 0.9988771614641814,
210
+ "eval_loss": 0.0004689108463935554,
211
+ "eval_overall_accuracy": 0.999892901129893,
212
+ "eval_overall_f1": 0.9992264504054896,
213
+ "eval_overall_precision": 0.9992015171174768,
214
+ "eval_overall_recall": 0.9992513849378649,
215
+ "eval_runtime": 16.3326,
216
+ "eval_samples_per_second": 408.938,
217
+ "eval_steps_per_second": 25.593,
218
+ "step": 10000
219
+ },
220
+ {
221
+ "epoch": 2.93,
222
+ "learning_rate": 1.1634662914939731e-05,
223
+ "loss": 0.0002,
224
+ "step": 11000
225
+ },
226
+ {
227
+ "epoch": 2.93,
228
+ "eval_EG_f1": 0.9988771614641814,
229
+ "eval_ET_f1": 1.0,
230
+ "eval_TE_f1": 0.9990268732689572,
231
+ "eval_loss": 0.0005850127199664712,
232
+ "eval_overall_accuracy": 0.9999286007532621,
233
+ "eval_overall_f1": 0.9993013274777922,
234
+ "eval_overall_precision": 0.9992514596536753,
235
+ "eval_overall_recall": 0.999351200279483,
236
+ "eval_runtime": 16.4437,
237
+ "eval_samples_per_second": 406.175,
238
+ "eval_steps_per_second": 25.42,
239
+ "step": 11000
240
+ },
241
+ {
242
+ "epoch": 3.19,
243
+ "learning_rate": 1.08741777253888e-05,
244
+ "loss": 0.0008,
245
+ "step": 12000
246
+ },
247
+ {
248
+ "epoch": 3.19,
249
+ "eval_EG_f1": 0.9992513849378649,
250
+ "eval_ET_f1": 1.0,
251
+ "eval_TE_f1": 0.9992513849378649,
252
+ "eval_loss": 0.0004198316019028425,
253
+ "eval_overall_accuracy": 0.9999464505649466,
254
+ "eval_overall_f1": 0.99950092329191,
255
+ "eval_overall_precision": 0.99950092329191,
256
+ "eval_overall_recall": 0.99950092329191,
257
+ "eval_runtime": 16.4892,
258
+ "eval_samples_per_second": 405.052,
259
+ "eval_steps_per_second": 25.35,
260
+ "step": 12000
261
+ },
262
+ {
263
+ "epoch": 3.46,
264
+ "learning_rate": 1.0113692535837865e-05,
265
+ "loss": 0.0002,
266
+ "step": 13000
267
+ },
268
+ {
269
+ "epoch": 3.46,
270
+ "eval_EG_f1": 0.9991016619254379,
271
+ "eval_ET_f1": 0.999700553975146,
272
+ "eval_TE_f1": 0.9989519389130109,
273
+ "eval_loss": 0.0011158857960253954,
274
+ "eval_overall_accuracy": 0.9998482766006819,
275
+ "eval_overall_f1": 0.9992513849378649,
276
+ "eval_overall_precision": 0.9992513849378649,
277
+ "eval_overall_recall": 0.9992513849378649,
278
+ "eval_runtime": 16.7537,
279
+ "eval_samples_per_second": 398.659,
280
+ "eval_steps_per_second": 24.95,
281
+ "step": 13000
282
+ },
283
+ {
284
+ "epoch": 3.73,
285
+ "learning_rate": 9.353207346286931e-06,
286
+ "loss": 0.0003,
287
+ "step": 14000
288
+ },
289
+ {
290
+ "epoch": 3.73,
291
+ "eval_EG_f1": 0.9988022159005839,
292
+ "eval_ET_f1": 0.999850276987573,
293
+ "eval_TE_f1": 0.9986524928881569,
294
+ "eval_loss": 0.0010174426715821028,
295
+ "eval_overall_accuracy": 0.9998393516948396,
296
+ "eval_overall_f1": 0.9991016619254379,
297
+ "eval_overall_precision": 0.9991016619254379,
298
+ "eval_overall_recall": 0.9991016619254379,
299
+ "eval_runtime": 16.5225,
300
+ "eval_samples_per_second": 404.237,
301
+ "eval_steps_per_second": 25.299,
302
+ "step": 14000
303
+ },
304
+ {
305
+ "epoch": 3.99,
306
+ "learning_rate": 8.592722156735998e-06,
307
+ "loss": 0.0,
308
+ "step": 15000
309
+ },
310
+ {
311
+ "epoch": 3.99,
312
+ "eval_EG_f1": 0.9992513849378649,
313
+ "eval_ET_f1": 1.0,
314
+ "eval_TE_f1": 0.9992513849378649,
315
+ "eval_loss": 0.001382750691846013,
316
+ "eval_overall_accuracy": 0.9999018260357353,
317
+ "eval_overall_f1": 0.99950092329191,
318
+ "eval_overall_precision": 0.99950092329191,
319
+ "eval_overall_recall": 0.99950092329191,
320
+ "eval_runtime": 16.3741,
321
+ "eval_samples_per_second": 407.899,
322
+ "eval_steps_per_second": 25.528,
323
+ "step": 15000
324
+ },
325
+ {
326
+ "epoch": 4.26,
327
+ "learning_rate": 7.832236967185065e-06,
328
+ "loss": 0.0002,
329
+ "step": 16000
330
+ },
331
+ {
332
+ "epoch": 4.26,
333
+ "eval_EG_f1": 0.9990268732689572,
334
+ "eval_ET_f1": 0.9997754322928364,
335
+ "eval_TE_f1": 0.9991016619254379,
336
+ "eval_loss": 0.0008425627020187676,
337
+ "eval_overall_accuracy": 0.9998839762240508,
338
+ "eval_overall_f1": 0.9993013274777922,
339
+ "eval_overall_precision": 0.9992514596536753,
340
+ "eval_overall_recall": 0.999351200279483,
341
+ "eval_runtime": 16.4088,
342
+ "eval_samples_per_second": 407.037,
343
+ "eval_steps_per_second": 25.474,
344
+ "step": 16000
345
+ },
346
+ {
347
+ "epoch": 4.52,
348
+ "learning_rate": 7.071751777634132e-06,
349
+ "loss": 0.0001,
350
+ "step": 17000
351
+ },
352
+ {
353
+ "epoch": 4.52,
354
+ "eval_EG_f1": 0.9994011079502919,
355
+ "eval_ET_f1": 0.999700553975146,
356
+ "eval_TE_f1": 0.9992513849378649,
357
+ "eval_loss": 0.000884207256603986,
358
+ "eval_overall_accuracy": 0.9999196758474198,
359
+ "eval_overall_f1": 0.999451015621101,
360
+ "eval_overall_precision": 0.999451015621101,
361
+ "eval_overall_recall": 0.999451015621101,
362
+ "eval_runtime": 16.6905,
363
+ "eval_samples_per_second": 400.169,
364
+ "eval_steps_per_second": 25.044,
365
+ "step": 17000
366
+ },
367
+ {
368
+ "epoch": 4.79,
369
+ "learning_rate": 6.311266588083198e-06,
370
+ "loss": 0.0005,
371
+ "step": 18000
372
+ },
373
+ {
374
+ "epoch": 4.79,
375
+ "eval_EG_f1": 0.9992513849378649,
376
+ "eval_ET_f1": 1.0,
377
+ "eval_TE_f1": 0.9992513849378649,
378
+ "eval_loss": 0.0005574871320277452,
379
+ "eval_overall_accuracy": 0.9999375256591043,
380
+ "eval_overall_f1": 0.99950092329191,
381
+ "eval_overall_precision": 0.99950092329191,
382
+ "eval_overall_recall": 0.99950092329191,
383
+ "eval_runtime": 16.5015,
384
+ "eval_samples_per_second": 404.752,
385
+ "eval_steps_per_second": 25.331,
386
+ "step": 18000
387
+ },
388
+ {
389
+ "epoch": 5.06,
390
+ "learning_rate": 5.5507813985322644e-06,
391
+ "loss": 0.0,
392
+ "step": 19000
393
+ },
394
+ {
395
+ "epoch": 5.06,
396
+ "eval_EG_f1": 0.9989519389130109,
397
+ "eval_ET_f1": 1.0,
398
+ "eval_TE_f1": 0.9989519389130109,
399
+ "eval_loss": 0.0005674651474691927,
400
+ "eval_overall_accuracy": 0.9999018260357353,
401
+ "eval_overall_f1": 0.999301292608674,
402
+ "eval_overall_precision": 0.999301292608674,
403
+ "eval_overall_recall": 0.999301292608674,
404
+ "eval_runtime": 16.4647,
405
+ "eval_samples_per_second": 405.657,
406
+ "eval_steps_per_second": 25.388,
407
+ "step": 19000
408
+ },
409
+ {
410
+ "epoch": 5.32,
411
+ "learning_rate": 4.79029620898133e-06,
412
+ "loss": 0.0001,
413
+ "step": 20000
414
+ },
415
+ {
416
+ "epoch": 5.32,
417
+ "eval_EG_f1": 0.9992513849378649,
418
+ "eval_ET_f1": 1.0,
419
+ "eval_TE_f1": 0.9992513849378649,
420
+ "eval_loss": 0.0006091786199249327,
421
+ "eval_overall_accuracy": 0.9999375256591043,
422
+ "eval_overall_f1": 0.99950092329191,
423
+ "eval_overall_precision": 0.99950092329191,
424
+ "eval_overall_recall": 0.99950092329191,
425
+ "eval_runtime": 16.4349,
426
+ "eval_samples_per_second": 406.391,
427
+ "eval_steps_per_second": 25.434,
428
+ "step": 20000
429
+ },
430
+ {
431
+ "epoch": 5.59,
432
+ "learning_rate": 4.029811019430397e-06,
433
+ "loss": 0.0,
434
+ "step": 21000
435
+ },
436
+ {
437
+ "epoch": 5.59,
438
+ "eval_EG_f1": 0.9994011079502919,
439
+ "eval_ET_f1": 1.0,
440
+ "eval_TE_f1": 0.9994011079502919,
441
+ "eval_loss": 0.0006266526179388165,
442
+ "eval_overall_accuracy": 0.9999553754707888,
443
+ "eval_overall_f1": 0.9996007386335279,
444
+ "eval_overall_precision": 0.9996007386335279,
445
+ "eval_overall_recall": 0.9996007386335279,
446
+ "eval_runtime": 16.6877,
447
+ "eval_samples_per_second": 400.236,
448
+ "eval_steps_per_second": 25.048,
449
+ "step": 21000
450
+ },
451
+ {
452
+ "epoch": 5.86,
453
+ "learning_rate": 3.2693258298794635e-06,
454
+ "loss": 0.0,
455
+ "step": 22000
456
+ },
457
+ {
458
+ "epoch": 5.86,
459
+ "eval_EG_f1": 0.9992513849378649,
460
+ "eval_ET_f1": 1.0,
461
+ "eval_TE_f1": 0.9992513849378649,
462
+ "eval_loss": 0.0006635423633269966,
463
+ "eval_overall_accuracy": 0.9999375256591043,
464
+ "eval_overall_f1": 0.99950092329191,
465
+ "eval_overall_precision": 0.99950092329191,
466
+ "eval_overall_recall": 0.99950092329191,
467
+ "eval_runtime": 16.716,
468
+ "eval_samples_per_second": 399.557,
469
+ "eval_steps_per_second": 25.006,
470
+ "step": 22000
471
+ },
472
+ {
473
+ "epoch": 6.12,
474
+ "learning_rate": 2.50884064032853e-06,
475
+ "loss": 0.0,
476
+ "step": 23000
477
+ },
478
+ {
479
+ "epoch": 6.12,
480
+ "eval_EG_f1": 0.9994011079502919,
481
+ "eval_ET_f1": 1.0,
482
+ "eval_TE_f1": 0.9994011079502919,
483
+ "eval_loss": 0.0006512191030196846,
484
+ "eval_overall_accuracy": 0.9999464505649466,
485
+ "eval_overall_f1": 0.9996007386335279,
486
+ "eval_overall_precision": 0.9996007386335279,
487
+ "eval_overall_recall": 0.9996007386335279,
488
+ "eval_runtime": 16.3818,
489
+ "eval_samples_per_second": 407.709,
490
+ "eval_steps_per_second": 25.516,
491
+ "step": 23000
492
+ },
493
+ {
494
+ "epoch": 6.39,
495
+ "learning_rate": 1.7483554507775962e-06,
496
+ "loss": 0.0,
497
+ "step": 24000
498
+ },
499
+ {
500
+ "epoch": 6.39,
501
+ "eval_EG_f1": 0.9994011079502919,
502
+ "eval_ET_f1": 1.0,
503
+ "eval_TE_f1": 0.9994011079502919,
504
+ "eval_loss": 0.000729710329324007,
505
+ "eval_overall_accuracy": 0.9999464505649466,
506
+ "eval_overall_f1": 0.9996007386335279,
507
+ "eval_overall_precision": 0.9996007386335279,
508
+ "eval_overall_recall": 0.9996007386335279,
509
+ "eval_runtime": 16.5641,
510
+ "eval_samples_per_second": 403.222,
511
+ "eval_steps_per_second": 25.235,
512
+ "step": 24000
513
+ },
514
+ {
515
+ "epoch": 6.65,
516
+ "learning_rate": 9.878702612266627e-07,
517
+ "loss": 0.0001,
518
+ "step": 25000
519
+ },
520
+ {
521
+ "epoch": 6.65,
522
+ "eval_EG_f1": 0.9994011079502919,
523
+ "eval_ET_f1": 1.0,
524
+ "eval_TE_f1": 0.9994011079502919,
525
+ "eval_loss": 0.0005905419238843024,
526
+ "eval_overall_accuracy": 0.9999464505649466,
527
+ "eval_overall_f1": 0.9996007386335279,
528
+ "eval_overall_precision": 0.9996007386335279,
529
+ "eval_overall_recall": 0.9996007386335279,
530
+ "eval_runtime": 16.5058,
531
+ "eval_samples_per_second": 404.647,
532
+ "eval_steps_per_second": 25.325,
533
+ "step": 25000
534
+ },
535
+ {
536
+ "epoch": 6.92,
537
+ "learning_rate": 2.2738507167572915e-07,
538
+ "loss": 0.0001,
539
+ "step": 26000
540
+ },
541
+ {
542
+ "epoch": 6.92,
543
+ "eval_EG_f1": 0.9994011079502919,
544
+ "eval_ET_f1": 1.0,
545
+ "eval_TE_f1": 0.9994011079502919,
546
+ "eval_loss": 0.0006466144695878029,
547
+ "eval_overall_accuracy": 0.9999464505649466,
548
+ "eval_overall_f1": 0.9996007386335279,
549
+ "eval_overall_precision": 0.9996007386335279,
550
+ "eval_overall_recall": 0.9996007386335279,
551
+ "eval_runtime": 16.8702,
552
+ "eval_samples_per_second": 395.906,
553
+ "eval_steps_per_second": 24.777,
554
+ "step": 26000
555
+ }
556
+ ],
557
+ "max_steps": 26299,
558
+ "num_train_epochs": 7,
559
+ "total_flos": 6405072201643752.0,
560
+ "trial_name": null,
561
+ "trial_params": null
562
+ }