brettbbb commited on
Commit
2e2eaef
·
1 Parent(s): a280fe0

End of training

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bc79e05710a49c3381452820af8fc475b972524c83851f436496ef0087ee919
3
  size 160069834
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba94bf30155c37733b627211546bc94a54c9da0b73ed8fab258ac7989fda5aff
3
  size 160069834
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f04894a14f5d2e8def3f17390319a8e7f0c99fafe83ffda0bc63ac9283ad21da
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61c184f2c3d0fda7df75b1ca99e3129df8ce7480b6f80c862a15b0f23e32da6c
3
  size 159967880
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.31634302906750233,
4
- "train_runtime": 1044.5949,
5
- "train_samples_per_second": 2.451,
6
- "train_steps_per_second": 0.613
7
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2799676021706546,
4
+ "train_runtime": 1494.9511,
5
+ "train_samples_per_second": 1.712,
6
+ "train_steps_per_second": 0.428
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.31634302906750233,
4
- "train_runtime": 1044.5949,
5
- "train_samples_per_second": 2.451,
6
- "train_steps_per_second": 0.613
7
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.2799676021706546,
4
+ "train_runtime": 1494.9511,
5
+ "train_samples_per_second": 1.712,
6
+ "train_steps_per_second": 0.428
7
  }
trainer_state.json CHANGED
@@ -11,1243 +11,1243 @@
11
  {
12
  "epoch": 0.03,
13
  "learning_rate": 2e-05,
14
- "loss": 3.2435,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.06,
19
  "learning_rate": 4e-05,
20
- "loss": 2.5079,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.09,
25
  "learning_rate": 6e-05,
26
- "loss": 2.8812,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.12,
31
  "learning_rate": 8e-05,
32
- "loss": 2.4454,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.16,
37
  "learning_rate": 0.0001,
38
- "loss": 3.1388,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.19,
43
  "learning_rate": 9.984251968503937e-05,
44
- "loss": 2.5071,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.22,
49
  "learning_rate": 9.968503937007875e-05,
50
- "loss": 2.3453,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.25,
55
  "learning_rate": 9.952755905511811e-05,
56
- "loss": 2.7086,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.28,
61
  "learning_rate": 9.937007874015748e-05,
62
- "loss": 2.4661,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.31,
67
  "learning_rate": 9.921259842519686e-05,
68
- "loss": 2.3876,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.34,
73
  "learning_rate": 9.905511811023622e-05,
74
- "loss": 2.0787,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.38,
79
  "learning_rate": 9.88976377952756e-05,
80
- "loss": 2.2623,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.41,
85
  "learning_rate": 9.874015748031497e-05,
86
- "loss": 1.9086,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.44,
91
- "learning_rate": 9.858267716535433e-05,
92
- "loss": 1.842,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.47,
97
- "learning_rate": 9.842519685039371e-05,
98
- "loss": 1.9555,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 0.5,
103
- "learning_rate": 9.826771653543308e-05,
104
- "loss": 1.6565,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 0.53,
109
- "learning_rate": 9.811023622047244e-05,
110
- "loss": 1.537,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 0.56,
115
- "learning_rate": 9.795275590551182e-05,
116
- "loss": 1.7247,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 0.59,
121
- "learning_rate": 9.779527559055119e-05,
122
- "loss": 1.7376,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 0.62,
127
  "learning_rate": 9.779527559055119e-05,
128
- "loss": 1.9343,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 0.66,
133
  "learning_rate": 9.763779527559055e-05,
134
- "loss": 1.6668,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 0.69,
139
  "learning_rate": 9.748031496062993e-05,
140
- "loss": 1.6601,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 0.72,
145
  "learning_rate": 9.73228346456693e-05,
146
- "loss": 1.8379,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 0.75,
151
  "learning_rate": 9.716535433070866e-05,
152
- "loss": 1.6798,
153
  "step": 24
154
  },
155
  {
156
  "epoch": 0.78,
157
  "learning_rate": 9.700787401574803e-05,
158
- "loss": 1.6726,
159
  "step": 25
160
  },
161
  {
162
  "epoch": 0.81,
163
  "learning_rate": 9.68503937007874e-05,
164
- "loss": 1.3375,
165
  "step": 26
166
  },
167
  {
168
  "epoch": 0.84,
169
  "learning_rate": 9.669291338582677e-05,
170
- "loss": 1.7586,
171
  "step": 27
172
  },
173
  {
174
  "epoch": 0.88,
175
  "learning_rate": 9.653543307086614e-05,
176
- "loss": 1.871,
177
  "step": 28
178
  },
179
  {
180
  "epoch": 0.91,
181
  "learning_rate": 9.637795275590552e-05,
182
- "loss": 1.35,
183
  "step": 29
184
  },
185
  {
186
  "epoch": 0.94,
187
  "learning_rate": 9.622047244094488e-05,
188
- "loss": 1.7885,
189
  "step": 30
190
  },
191
  {
192
  "epoch": 0.97,
193
  "learning_rate": 9.606299212598425e-05,
194
- "loss": 1.6137,
195
  "step": 31
196
  },
197
  {
198
  "epoch": 1.0,
199
  "learning_rate": 9.590551181102363e-05,
200
- "loss": 1.645,
201
  "step": 32
202
  },
203
  {
204
  "epoch": 1.03,
205
  "learning_rate": 9.574803149606299e-05,
206
- "loss": 1.4522,
207
  "step": 33
208
  },
209
  {
210
  "epoch": 1.06,
211
  "learning_rate": 9.559055118110236e-05,
212
- "loss": 1.5255,
213
  "step": 34
214
  },
215
  {
216
  "epoch": 1.09,
217
  "learning_rate": 9.543307086614174e-05,
218
- "loss": 1.4846,
219
  "step": 35
220
  },
221
  {
222
  "epoch": 1.12,
223
  "learning_rate": 9.52755905511811e-05,
224
- "loss": 1.4279,
225
  "step": 36
226
  },
227
  {
228
  "epoch": 1.16,
229
  "learning_rate": 9.511811023622048e-05,
230
- "loss": 1.2765,
231
  "step": 37
232
  },
233
  {
234
  "epoch": 1.19,
235
  "learning_rate": 9.496062992125985e-05,
236
- "loss": 1.3945,
237
  "step": 38
238
  },
239
  {
240
  "epoch": 1.22,
241
  "learning_rate": 9.480314960629921e-05,
242
- "loss": 1.3376,
243
  "step": 39
244
  },
245
  {
246
  "epoch": 1.25,
247
  "learning_rate": 9.464566929133859e-05,
248
- "loss": 1.3207,
249
  "step": 40
250
  },
251
  {
252
  "epoch": 1.28,
253
  "learning_rate": 9.448818897637796e-05,
254
- "loss": 1.2531,
255
  "step": 41
256
  },
257
  {
258
  "epoch": 1.31,
259
  "learning_rate": 9.433070866141732e-05,
260
- "loss": 1.3067,
261
  "step": 42
262
  },
263
  {
264
  "epoch": 1.34,
265
  "learning_rate": 9.41732283464567e-05,
266
- "loss": 1.3033,
267
  "step": 43
268
  },
269
  {
270
  "epoch": 1.38,
271
  "learning_rate": 9.401574803149607e-05,
272
- "loss": 1.3832,
273
  "step": 44
274
  },
275
  {
276
  "epoch": 1.41,
277
  "learning_rate": 9.385826771653545e-05,
278
- "loss": 1.3039,
279
  "step": 45
280
  },
281
  {
282
  "epoch": 1.44,
283
  "learning_rate": 9.370078740157481e-05,
284
- "loss": 1.3668,
285
  "step": 46
286
  },
287
  {
288
  "epoch": 1.47,
289
  "learning_rate": 9.354330708661418e-05,
290
- "loss": 1.5877,
291
  "step": 47
292
  },
293
  {
294
  "epoch": 1.5,
295
  "learning_rate": 9.338582677165355e-05,
296
- "loss": 1.3632,
297
  "step": 48
298
  },
299
  {
300
  "epoch": 1.53,
301
  "learning_rate": 9.322834645669292e-05,
302
- "loss": 1.3005,
303
  "step": 49
304
  },
305
  {
306
  "epoch": 1.56,
307
  "learning_rate": 9.307086614173229e-05,
308
- "loss": 1.3815,
309
  "step": 50
310
  },
311
  {
312
  "epoch": 1.59,
313
  "learning_rate": 9.291338582677166e-05,
314
- "loss": 1.2945,
315
  "step": 51
316
  },
317
  {
318
  "epoch": 1.62,
319
  "learning_rate": 9.275590551181103e-05,
320
- "loss": 1.534,
321
  "step": 52
322
  },
323
  {
324
  "epoch": 1.66,
325
  "learning_rate": 9.259842519685041e-05,
326
- "loss": 1.4206,
327
  "step": 53
328
  },
329
  {
330
  "epoch": 1.69,
331
  "learning_rate": 9.244094488188977e-05,
332
- "loss": 1.1488,
333
  "step": 54
334
  },
335
  {
336
  "epoch": 1.72,
337
  "learning_rate": 9.228346456692914e-05,
338
- "loss": 1.3944,
339
  "step": 55
340
  },
341
  {
342
  "epoch": 1.75,
343
  "learning_rate": 9.21259842519685e-05,
344
- "loss": 1.643,
345
  "step": 56
346
  },
347
  {
348
  "epoch": 1.78,
349
  "learning_rate": 9.196850393700787e-05,
350
- "loss": 1.5561,
351
  "step": 57
352
  },
353
  {
354
  "epoch": 1.81,
355
  "learning_rate": 9.181102362204725e-05,
356
- "loss": 1.2806,
357
  "step": 58
358
  },
359
  {
360
  "epoch": 1.84,
361
  "learning_rate": 9.165354330708661e-05,
362
- "loss": 1.2347,
363
  "step": 59
364
  },
365
  {
366
  "epoch": 1.88,
367
  "learning_rate": 9.149606299212598e-05,
368
- "loss": 1.5723,
369
  "step": 60
370
  },
371
  {
372
  "epoch": 1.91,
373
  "learning_rate": 9.133858267716536e-05,
374
- "loss": 1.4924,
375
  "step": 61
376
  },
377
  {
378
  "epoch": 1.94,
379
  "learning_rate": 9.118110236220472e-05,
380
- "loss": 1.4557,
381
  "step": 62
382
  },
383
  {
384
  "epoch": 1.97,
385
  "learning_rate": 9.102362204724409e-05,
386
- "loss": 1.2131,
387
  "step": 63
388
  },
389
  {
390
  "epoch": 2.0,
391
  "learning_rate": 9.086614173228347e-05,
392
- "loss": 1.4723,
393
  "step": 64
394
  },
395
  {
396
  "epoch": 2.03,
397
  "learning_rate": 9.070866141732283e-05,
398
- "loss": 1.0617,
399
  "step": 65
400
  },
401
  {
402
  "epoch": 2.06,
403
  "learning_rate": 9.05511811023622e-05,
404
- "loss": 1.0665,
405
  "step": 66
406
  },
407
  {
408
  "epoch": 2.09,
409
  "learning_rate": 9.039370078740158e-05,
410
- "loss": 1.0078,
411
  "step": 67
412
  },
413
  {
414
  "epoch": 2.12,
415
  "learning_rate": 9.023622047244094e-05,
416
- "loss": 1.0409,
417
  "step": 68
418
  },
419
  {
420
  "epoch": 2.16,
421
  "learning_rate": 9.007874015748032e-05,
422
- "loss": 1.23,
423
  "step": 69
424
  },
425
  {
426
  "epoch": 2.19,
427
  "learning_rate": 8.992125984251969e-05,
428
- "loss": 1.1087,
429
  "step": 70
430
  },
431
  {
432
  "epoch": 2.22,
433
  "learning_rate": 8.976377952755905e-05,
434
- "loss": 0.7524,
435
  "step": 71
436
  },
437
  {
438
  "epoch": 2.25,
439
  "learning_rate": 8.960629921259843e-05,
440
- "loss": 0.8123,
441
  "step": 72
442
  },
443
  {
444
  "epoch": 2.28,
445
  "learning_rate": 8.94488188976378e-05,
446
- "loss": 1.0234,
447
  "step": 73
448
  },
449
  {
450
  "epoch": 2.31,
451
  "learning_rate": 8.929133858267716e-05,
452
- "loss": 1.1632,
453
  "step": 74
454
  },
455
  {
456
  "epoch": 2.34,
457
  "learning_rate": 8.913385826771654e-05,
458
- "loss": 1.0603,
459
  "step": 75
460
  },
461
  {
462
  "epoch": 2.38,
463
  "learning_rate": 8.897637795275591e-05,
464
- "loss": 1.3306,
465
  "step": 76
466
  },
467
  {
468
  "epoch": 2.41,
469
  "learning_rate": 8.881889763779529e-05,
470
- "loss": 0.8474,
471
  "step": 77
472
  },
473
  {
474
  "epoch": 2.44,
475
  "learning_rate": 8.866141732283465e-05,
476
- "loss": 0.9182,
477
  "step": 78
478
  },
479
  {
480
  "epoch": 2.47,
481
  "learning_rate": 8.850393700787402e-05,
482
- "loss": 0.9203,
483
  "step": 79
484
  },
485
  {
486
  "epoch": 2.5,
487
  "learning_rate": 8.83464566929134e-05,
488
- "loss": 0.979,
489
  "step": 80
490
  },
491
  {
492
  "epoch": 2.53,
493
  "learning_rate": 8.818897637795276e-05,
494
- "loss": 0.9674,
495
  "step": 81
496
  },
497
  {
498
  "epoch": 2.56,
499
  "learning_rate": 8.803149606299213e-05,
500
- "loss": 1.0255,
501
  "step": 82
502
  },
503
  {
504
  "epoch": 2.59,
505
  "learning_rate": 8.78740157480315e-05,
506
- "loss": 0.8677,
507
  "step": 83
508
  },
509
  {
510
  "epoch": 2.62,
511
  "learning_rate": 8.771653543307087e-05,
512
- "loss": 0.9527,
513
  "step": 84
514
  },
515
  {
516
  "epoch": 2.66,
517
  "learning_rate": 8.755905511811025e-05,
518
- "loss": 0.8919,
519
  "step": 85
520
  },
521
  {
522
  "epoch": 2.69,
523
  "learning_rate": 8.740157480314962e-05,
524
- "loss": 0.8617,
525
  "step": 86
526
  },
527
  {
528
  "epoch": 2.72,
529
  "learning_rate": 8.724409448818898e-05,
530
- "loss": 1.1484,
531
  "step": 87
532
  },
533
  {
534
  "epoch": 2.75,
535
  "learning_rate": 8.708661417322835e-05,
536
- "loss": 0.8079,
537
  "step": 88
538
  },
539
  {
540
  "epoch": 2.78,
541
  "learning_rate": 8.692913385826773e-05,
542
- "loss": 1.023,
543
  "step": 89
544
  },
545
  {
546
  "epoch": 2.81,
547
  "learning_rate": 8.677165354330709e-05,
548
- "loss": 0.9347,
549
  "step": 90
550
  },
551
  {
552
  "epoch": 2.84,
553
  "learning_rate": 8.661417322834646e-05,
554
- "loss": 0.9897,
555
  "step": 91
556
  },
557
  {
558
  "epoch": 2.88,
559
  "learning_rate": 8.645669291338582e-05,
560
- "loss": 0.8099,
561
  "step": 92
562
  },
563
  {
564
  "epoch": 2.91,
565
  "learning_rate": 8.62992125984252e-05,
566
- "loss": 0.7869,
567
  "step": 93
568
  },
569
  {
570
  "epoch": 2.94,
571
  "learning_rate": 8.614173228346457e-05,
572
- "loss": 1.082,
573
  "step": 94
574
  },
575
  {
576
  "epoch": 2.97,
577
  "learning_rate": 8.598425196850393e-05,
578
- "loss": 0.9262,
579
  "step": 95
580
  },
581
  {
582
  "epoch": 3.0,
583
  "learning_rate": 8.582677165354331e-05,
584
- "loss": 0.9898,
585
  "step": 96
586
  },
587
  {
588
  "epoch": 3.03,
589
  "learning_rate": 8.566929133858268e-05,
590
- "loss": 0.5495,
591
  "step": 97
592
  },
593
  {
594
  "epoch": 3.06,
595
  "learning_rate": 8.551181102362204e-05,
596
- "loss": 0.5758,
597
  "step": 98
598
  },
599
  {
600
  "epoch": 3.09,
601
  "learning_rate": 8.535433070866142e-05,
602
- "loss": 0.4991,
603
  "step": 99
604
  },
605
  {
606
  "epoch": 3.12,
607
  "learning_rate": 8.519685039370079e-05,
608
- "loss": 0.7096,
609
  "step": 100
610
  },
611
  {
612
  "epoch": 3.16,
613
  "learning_rate": 8.503937007874016e-05,
614
- "loss": 0.4769,
615
  "step": 101
616
  },
617
  {
618
  "epoch": 3.19,
619
  "learning_rate": 8.488188976377953e-05,
620
- "loss": 0.4177,
621
  "step": 102
622
  },
623
  {
624
  "epoch": 3.22,
625
  "learning_rate": 8.47244094488189e-05,
626
- "loss": 0.5507,
627
  "step": 103
628
  },
629
  {
630
  "epoch": 3.25,
631
  "learning_rate": 8.456692913385827e-05,
632
- "loss": 0.6499,
633
  "step": 104
634
  },
635
  {
636
  "epoch": 3.28,
637
  "learning_rate": 8.440944881889764e-05,
638
- "loss": 0.3373,
639
  "step": 105
640
  },
641
  {
642
  "epoch": 3.31,
643
  "learning_rate": 8.4251968503937e-05,
644
- "loss": 0.4854,
645
  "step": 106
646
  },
647
  {
648
  "epoch": 3.34,
649
  "learning_rate": 8.409448818897638e-05,
650
- "loss": 0.6089,
651
  "step": 107
652
  },
653
  {
654
  "epoch": 3.38,
655
  "learning_rate": 8.393700787401575e-05,
656
- "loss": 0.3305,
657
  "step": 108
658
  },
659
  {
660
  "epoch": 3.41,
661
  "learning_rate": 8.377952755905513e-05,
662
- "loss": 0.5199,
663
  "step": 109
664
  },
665
  {
666
  "epoch": 3.44,
667
  "learning_rate": 8.36220472440945e-05,
668
- "loss": 0.4402,
669
  "step": 110
670
  },
671
  {
672
  "epoch": 3.47,
673
  "learning_rate": 8.346456692913386e-05,
674
- "loss": 0.4375,
675
  "step": 111
676
  },
677
  {
678
  "epoch": 3.5,
679
  "learning_rate": 8.330708661417324e-05,
680
- "loss": 0.54,
681
  "step": 112
682
  },
683
  {
684
  "epoch": 3.53,
685
  "learning_rate": 8.31496062992126e-05,
686
- "loss": 0.4239,
687
  "step": 113
688
  },
689
  {
690
  "epoch": 3.56,
691
  "learning_rate": 8.299212598425197e-05,
692
- "loss": 0.4964,
693
  "step": 114
694
  },
695
  {
696
  "epoch": 3.59,
697
  "learning_rate": 8.283464566929135e-05,
698
- "loss": 0.5613,
699
  "step": 115
700
  },
701
  {
702
  "epoch": 3.62,
703
  "learning_rate": 8.267716535433071e-05,
704
- "loss": 0.5382,
705
  "step": 116
706
  },
707
  {
708
  "epoch": 3.66,
709
  "learning_rate": 8.251968503937009e-05,
710
- "loss": 0.4906,
711
  "step": 117
712
  },
713
  {
714
  "epoch": 3.69,
715
  "learning_rate": 8.236220472440946e-05,
716
- "loss": 0.3637,
717
  "step": 118
718
  },
719
  {
720
  "epoch": 3.72,
721
  "learning_rate": 8.220472440944882e-05,
722
- "loss": 0.352,
723
  "step": 119
724
  },
725
  {
726
  "epoch": 3.75,
727
  "learning_rate": 8.20472440944882e-05,
728
- "loss": 0.2867,
729
  "step": 120
730
  },
731
  {
732
  "epoch": 3.78,
733
  "learning_rate": 8.188976377952757e-05,
734
- "loss": 0.4565,
735
  "step": 121
736
  },
737
  {
738
  "epoch": 3.81,
739
  "learning_rate": 8.173228346456693e-05,
740
- "loss": 0.4714,
741
  "step": 122
742
  },
743
  {
744
  "epoch": 3.84,
745
  "learning_rate": 8.15748031496063e-05,
746
- "loss": 0.4415,
747
  "step": 123
748
  },
749
  {
750
  "epoch": 3.88,
751
  "learning_rate": 8.141732283464568e-05,
752
- "loss": 0.5417,
753
  "step": 124
754
  },
755
  {
756
  "epoch": 3.91,
757
  "learning_rate": 8.125984251968504e-05,
758
- "loss": 0.4383,
759
  "step": 125
760
  },
761
  {
762
  "epoch": 3.94,
763
  "learning_rate": 8.110236220472441e-05,
764
- "loss": 0.5118,
765
  "step": 126
766
  },
767
  {
768
  "epoch": 3.97,
769
  "learning_rate": 8.094488188976377e-05,
770
- "loss": 0.3704,
771
  "step": 127
772
  },
773
  {
774
  "epoch": 4.0,
775
  "learning_rate": 8.078740157480315e-05,
776
- "loss": 0.4092,
777
  "step": 128
778
  },
779
  {
780
  "epoch": 4.03,
781
  "learning_rate": 8.062992125984252e-05,
782
- "loss": 0.2282,
783
  "step": 129
784
  },
785
  {
786
  "epoch": 4.06,
787
  "learning_rate": 8.047244094488188e-05,
788
- "loss": 0.2546,
789
  "step": 130
790
  },
791
  {
792
  "epoch": 4.09,
793
  "learning_rate": 8.031496062992126e-05,
794
- "loss": 0.2409,
795
  "step": 131
796
  },
797
  {
798
  "epoch": 4.12,
799
  "learning_rate": 8.015748031496063e-05,
800
- "loss": 0.1932,
801
  "step": 132
802
  },
803
  {
804
  "epoch": 4.16,
805
  "learning_rate": 8e-05,
806
- "loss": 0.2128,
807
  "step": 133
808
  },
809
  {
810
  "epoch": 4.19,
811
  "learning_rate": 7.984251968503937e-05,
812
- "loss": 0.2557,
813
  "step": 134
814
  },
815
  {
816
  "epoch": 4.22,
817
  "learning_rate": 7.968503937007874e-05,
818
- "loss": 0.2007,
819
  "step": 135
820
  },
821
  {
822
  "epoch": 4.25,
823
  "learning_rate": 7.952755905511812e-05,
824
- "loss": 0.256,
825
  "step": 136
826
  },
827
  {
828
  "epoch": 4.28,
829
  "learning_rate": 7.937007874015748e-05,
830
- "loss": 0.3295,
831
  "step": 137
832
  },
833
  {
834
  "epoch": 4.31,
835
  "learning_rate": 7.921259842519685e-05,
836
- "loss": 0.2158,
837
  "step": 138
838
  },
839
  {
840
  "epoch": 4.34,
841
  "learning_rate": 7.905511811023623e-05,
842
- "loss": 0.2334,
843
  "step": 139
844
  },
845
  {
846
  "epoch": 4.38,
847
  "learning_rate": 7.889763779527559e-05,
848
- "loss": 0.2821,
849
  "step": 140
850
  },
851
  {
852
  "epoch": 4.41,
853
  "learning_rate": 7.874015748031497e-05,
854
- "loss": 0.2101,
855
  "step": 141
856
  },
857
  {
858
  "epoch": 4.44,
859
  "learning_rate": 7.858267716535434e-05,
860
- "loss": 0.4001,
861
  "step": 142
862
  },
863
  {
864
  "epoch": 4.47,
865
  "learning_rate": 7.84251968503937e-05,
866
- "loss": 0.3537,
867
  "step": 143
868
  },
869
  {
870
  "epoch": 4.5,
871
  "learning_rate": 7.826771653543308e-05,
872
- "loss": 0.1983,
873
  "step": 144
874
  },
875
  {
876
  "epoch": 4.53,
877
  "learning_rate": 7.811023622047245e-05,
878
- "loss": 0.1781,
879
  "step": 145
880
  },
881
  {
882
  "epoch": 4.56,
883
  "learning_rate": 7.795275590551181e-05,
884
- "loss": 0.1608,
885
  "step": 146
886
  },
887
  {
888
  "epoch": 4.59,
889
  "learning_rate": 7.779527559055119e-05,
890
- "loss": 0.2135,
891
  "step": 147
892
  },
893
  {
894
  "epoch": 4.62,
895
  "learning_rate": 7.763779527559056e-05,
896
- "loss": 0.2055,
897
  "step": 148
898
  },
899
  {
900
  "epoch": 4.66,
901
  "learning_rate": 7.748031496062993e-05,
902
- "loss": 0.2123,
903
  "step": 149
904
  },
905
  {
906
  "epoch": 4.69,
907
  "learning_rate": 7.73228346456693e-05,
908
- "loss": 0.2106,
909
  "step": 150
910
  },
911
  {
912
  "epoch": 4.72,
913
  "learning_rate": 7.716535433070867e-05,
914
- "loss": 0.1942,
915
  "step": 151
916
  },
917
  {
918
  "epoch": 4.75,
919
  "learning_rate": 7.700787401574804e-05,
920
- "loss": 0.1681,
921
  "step": 152
922
  },
923
  {
924
  "epoch": 4.78,
925
  "learning_rate": 7.685039370078741e-05,
926
- "loss": 0.2417,
927
  "step": 153
928
  },
929
  {
930
  "epoch": 4.81,
931
  "learning_rate": 7.669291338582677e-05,
932
- "loss": 0.2606,
933
  "step": 154
934
  },
935
  {
936
  "epoch": 4.84,
937
  "learning_rate": 7.653543307086615e-05,
938
- "loss": 0.2425,
939
  "step": 155
940
  },
941
  {
942
  "epoch": 4.88,
943
  "learning_rate": 7.637795275590552e-05,
944
- "loss": 0.1729,
945
  "step": 156
946
  },
947
  {
948
  "epoch": 4.91,
949
  "learning_rate": 7.622047244094488e-05,
950
- "loss": 0.2462,
951
  "step": 157
952
  },
953
  {
954
  "epoch": 4.94,
955
  "learning_rate": 7.606299212598425e-05,
956
- "loss": 0.2655,
957
  "step": 158
958
  },
959
  {
960
  "epoch": 4.97,
961
  "learning_rate": 7.590551181102362e-05,
962
- "loss": 0.1604,
963
  "step": 159
964
  },
965
  {
966
  "epoch": 5.0,
967
  "learning_rate": 7.5748031496063e-05,
968
- "loss": 0.2374,
969
  "step": 160
970
  },
971
  {
972
  "epoch": 5.03,
973
  "learning_rate": 7.559055118110236e-05,
974
- "loss": 0.1123,
975
  "step": 161
976
  },
977
  {
978
  "epoch": 5.06,
979
  "learning_rate": 7.543307086614173e-05,
980
- "loss": 0.1252,
981
  "step": 162
982
  },
983
  {
984
  "epoch": 5.09,
985
  "learning_rate": 7.52755905511811e-05,
986
- "loss": 0.1216,
987
  "step": 163
988
  },
989
  {
990
  "epoch": 5.12,
991
  "learning_rate": 7.511811023622047e-05,
992
- "loss": 0.1319,
993
  "step": 164
994
  },
995
  {
996
  "epoch": 5.16,
997
  "learning_rate": 7.496062992125985e-05,
998
- "loss": 0.1857,
999
  "step": 165
1000
  },
1001
  {
1002
  "epoch": 5.19,
1003
  "learning_rate": 7.480314960629921e-05,
1004
- "loss": 0.1783,
1005
  "step": 166
1006
  },
1007
  {
1008
  "epoch": 5.22,
1009
  "learning_rate": 7.464566929133858e-05,
1010
- "loss": 0.15,
1011
  "step": 167
1012
  },
1013
  {
1014
  "epoch": 5.25,
1015
  "learning_rate": 7.448818897637796e-05,
1016
- "loss": 0.1676,
1017
  "step": 168
1018
  },
1019
  {
1020
  "epoch": 5.28,
1021
  "learning_rate": 7.433070866141732e-05,
1022
- "loss": 0.1234,
1023
  "step": 169
1024
  },
1025
  {
1026
  "epoch": 5.31,
1027
  "learning_rate": 7.417322834645669e-05,
1028
- "loss": 0.1457,
1029
  "step": 170
1030
  },
1031
  {
1032
  "epoch": 5.34,
1033
  "learning_rate": 7.401574803149607e-05,
1034
- "loss": 0.1288,
1035
  "step": 171
1036
  },
1037
  {
1038
  "epoch": 5.38,
1039
  "learning_rate": 7.385826771653543e-05,
1040
- "loss": 0.2026,
1041
  "step": 172
1042
  },
1043
  {
1044
  "epoch": 5.41,
1045
  "learning_rate": 7.370078740157481e-05,
1046
- "loss": 0.1965,
1047
  "step": 173
1048
  },
1049
  {
1050
  "epoch": 5.44,
1051
  "learning_rate": 7.354330708661418e-05,
1052
- "loss": 0.145,
1053
  "step": 174
1054
  },
1055
  {
1056
  "epoch": 5.47,
1057
  "learning_rate": 7.338582677165354e-05,
1058
- "loss": 0.1021,
1059
  "step": 175
1060
  },
1061
  {
1062
  "epoch": 5.5,
1063
  "learning_rate": 7.322834645669292e-05,
1064
- "loss": 0.2084,
1065
  "step": 176
1066
  },
1067
  {
1068
  "epoch": 5.53,
1069
  "learning_rate": 7.307086614173229e-05,
1070
- "loss": 0.133,
1071
  "step": 177
1072
  },
1073
  {
1074
  "epoch": 5.56,
1075
  "learning_rate": 7.291338582677165e-05,
1076
- "loss": 0.2054,
1077
  "step": 178
1078
  },
1079
  {
1080
  "epoch": 5.59,
1081
  "learning_rate": 7.275590551181103e-05,
1082
- "loss": 0.1512,
1083
  "step": 179
1084
  },
1085
  {
1086
  "epoch": 5.62,
1087
  "learning_rate": 7.25984251968504e-05,
1088
- "loss": 0.1231,
1089
  "step": 180
1090
  },
1091
  {
1092
  "epoch": 5.66,
1093
  "learning_rate": 7.244094488188978e-05,
1094
- "loss": 0.1453,
1095
  "step": 181
1096
  },
1097
  {
1098
  "epoch": 5.69,
1099
  "learning_rate": 7.228346456692914e-05,
1100
- "loss": 0.1277,
1101
  "step": 182
1102
  },
1103
  {
1104
  "epoch": 5.72,
1105
  "learning_rate": 7.212598425196851e-05,
1106
- "loss": 0.164,
1107
  "step": 183
1108
  },
1109
  {
1110
  "epoch": 5.75,
1111
  "learning_rate": 7.196850393700789e-05,
1112
- "loss": 0.1533,
1113
  "step": 184
1114
  },
1115
  {
1116
  "epoch": 5.78,
1117
  "learning_rate": 7.181102362204725e-05,
1118
- "loss": 0.2112,
1119
  "step": 185
1120
  },
1121
  {
1122
  "epoch": 5.81,
1123
  "learning_rate": 7.165354330708662e-05,
1124
- "loss": 0.1632,
1125
  "step": 186
1126
  },
1127
  {
1128
  "epoch": 5.84,
1129
  "learning_rate": 7.1496062992126e-05,
1130
- "loss": 0.1842,
1131
  "step": 187
1132
  },
1133
  {
1134
  "epoch": 5.88,
1135
  "learning_rate": 7.133858267716536e-05,
1136
- "loss": 0.1738,
1137
  "step": 188
1138
  },
1139
  {
1140
  "epoch": 5.91,
1141
  "learning_rate": 7.118110236220473e-05,
1142
- "loss": 0.1458,
1143
  "step": 189
1144
  },
1145
  {
1146
  "epoch": 5.94,
1147
  "learning_rate": 7.102362204724409e-05,
1148
- "loss": 0.2163,
1149
  "step": 190
1150
  },
1151
  {
1152
  "epoch": 5.97,
1153
  "learning_rate": 7.086614173228347e-05,
1154
- "loss": 0.2372,
1155
  "step": 191
1156
  },
1157
  {
1158
  "epoch": 6.0,
1159
  "learning_rate": 7.070866141732284e-05,
1160
- "loss": 0.1905,
1161
  "step": 192
1162
  },
1163
  {
1164
  "epoch": 6.03,
1165
  "learning_rate": 7.05511811023622e-05,
1166
- "loss": 0.0759,
1167
  "step": 193
1168
  },
1169
  {
1170
  "epoch": 6.06,
1171
  "learning_rate": 7.039370078740157e-05,
1172
- "loss": 0.101,
1173
  "step": 194
1174
  },
1175
  {
1176
  "epoch": 6.09,
1177
  "learning_rate": 7.023622047244095e-05,
1178
- "loss": 0.0936,
1179
  "step": 195
1180
  },
1181
  {
1182
  "epoch": 6.12,
1183
  "learning_rate": 7.007874015748031e-05,
1184
- "loss": 0.0911,
1185
  "step": 196
1186
  },
1187
  {
1188
  "epoch": 6.16,
1189
  "learning_rate": 6.992125984251969e-05,
1190
- "loss": 0.1201,
1191
  "step": 197
1192
  },
1193
  {
1194
  "epoch": 6.19,
1195
  "learning_rate": 6.976377952755906e-05,
1196
- "loss": 0.1026,
1197
  "step": 198
1198
  },
1199
  {
1200
  "epoch": 6.22,
1201
  "learning_rate": 6.960629921259842e-05,
1202
- "loss": 0.1676,
1203
  "step": 199
1204
  },
1205
  {
1206
  "epoch": 6.25,
1207
  "learning_rate": 6.94488188976378e-05,
1208
- "loss": 0.0755,
1209
  "step": 200
1210
  },
1211
  {
1212
  "epoch": 6.28,
1213
  "learning_rate": 6.929133858267717e-05,
1214
- "loss": 0.1642,
1215
  "step": 201
1216
  },
1217
  {
1218
  "epoch": 6.31,
1219
  "learning_rate": 6.913385826771653e-05,
1220
- "loss": 0.0813,
1221
  "step": 202
1222
  },
1223
  {
1224
  "epoch": 6.34,
1225
  "learning_rate": 6.897637795275591e-05,
1226
- "loss": 0.1094,
1227
  "step": 203
1228
  },
1229
  {
1230
  "epoch": 6.38,
1231
  "learning_rate": 6.881889763779528e-05,
1232
- "loss": 0.1899,
1233
  "step": 204
1234
  },
1235
  {
1236
  "epoch": 6.41,
1237
  "learning_rate": 6.866141732283465e-05,
1238
- "loss": 0.126,
1239
  "step": 205
1240
  },
1241
  {
1242
  "epoch": 6.44,
1243
  "learning_rate": 6.850393700787402e-05,
1244
- "loss": 0.1822,
1245
  "step": 206
1246
  },
1247
  {
1248
  "epoch": 6.47,
1249
  "learning_rate": 6.834645669291338e-05,
1250
- "loss": 0.1147,
1251
  "step": 207
1252
  },
1253
  {
@@ -1259,2610 +1259,2610 @@
1259
  {
1260
  "epoch": 6.53,
1261
  "learning_rate": 6.803149606299213e-05,
1262
- "loss": 0.1423,
1263
  "step": 209
1264
  },
1265
  {
1266
  "epoch": 6.56,
1267
  "learning_rate": 6.78740157480315e-05,
1268
- "loss": 0.1508,
1269
  "step": 210
1270
  },
1271
  {
1272
  "epoch": 6.59,
1273
  "learning_rate": 6.771653543307087e-05,
1274
- "loss": 0.1236,
1275
  "step": 211
1276
  },
1277
  {
1278
  "epoch": 6.62,
1279
  "learning_rate": 6.755905511811024e-05,
1280
- "loss": 0.1291,
1281
  "step": 212
1282
  },
1283
  {
1284
  "epoch": 6.66,
1285
  "learning_rate": 6.740157480314962e-05,
1286
- "loss": 0.1077,
1287
  "step": 213
1288
  },
1289
  {
1290
  "epoch": 6.69,
1291
  "learning_rate": 6.724409448818898e-05,
1292
- "loss": 0.1484,
1293
  "step": 214
1294
  },
1295
  {
1296
  "epoch": 6.72,
1297
  "learning_rate": 6.708661417322835e-05,
1298
- "loss": 0.1311,
1299
  "step": 215
1300
  },
1301
  {
1302
  "epoch": 6.75,
1303
  "learning_rate": 6.692913385826773e-05,
1304
- "loss": 0.1,
1305
  "step": 216
1306
  },
1307
  {
1308
  "epoch": 6.78,
1309
  "learning_rate": 6.677165354330709e-05,
1310
- "loss": 0.1236,
1311
  "step": 217
1312
  },
1313
  {
1314
  "epoch": 6.81,
1315
  "learning_rate": 6.661417322834646e-05,
1316
- "loss": 0.091,
1317
  "step": 218
1318
  },
1319
  {
1320
  "epoch": 6.84,
1321
  "learning_rate": 6.645669291338584e-05,
1322
- "loss": 0.1268,
1323
  "step": 219
1324
  },
1325
  {
1326
  "epoch": 6.88,
1327
  "learning_rate": 6.62992125984252e-05,
1328
- "loss": 0.1366,
1329
  "step": 220
1330
  },
1331
  {
1332
  "epoch": 6.91,
1333
  "learning_rate": 6.614173228346457e-05,
1334
- "loss": 0.1511,
1335
  "step": 221
1336
  },
1337
  {
1338
  "epoch": 6.94,
1339
  "learning_rate": 6.598425196850395e-05,
1340
- "loss": 0.1188,
1341
  "step": 222
1342
  },
1343
  {
1344
  "epoch": 6.97,
1345
  "learning_rate": 6.582677165354331e-05,
1346
- "loss": 0.1433,
1347
  "step": 223
1348
  },
1349
  {
1350
  "epoch": 7.0,
1351
  "learning_rate": 6.566929133858268e-05,
1352
- "loss": 0.1124,
1353
  "step": 224
1354
  },
1355
  {
1356
  "epoch": 7.03,
1357
  "learning_rate": 6.551181102362204e-05,
1358
- "loss": 0.121,
1359
  "step": 225
1360
  },
1361
  {
1362
  "epoch": 7.06,
1363
  "learning_rate": 6.535433070866141e-05,
1364
- "loss": 0.0712,
1365
  "step": 226
1366
  },
1367
  {
1368
  "epoch": 7.09,
1369
  "learning_rate": 6.519685039370079e-05,
1370
- "loss": 0.0902,
1371
  "step": 227
1372
  },
1373
  {
1374
  "epoch": 7.12,
1375
  "learning_rate": 6.503937007874015e-05,
1376
- "loss": 0.1052,
1377
  "step": 228
1378
  },
1379
  {
1380
  "epoch": 7.16,
1381
  "learning_rate": 6.488188976377953e-05,
1382
- "loss": 0.086,
1383
  "step": 229
1384
  },
1385
  {
1386
  "epoch": 7.19,
1387
  "learning_rate": 6.47244094488189e-05,
1388
- "loss": 0.0905,
1389
  "step": 230
1390
  },
1391
  {
1392
  "epoch": 7.22,
1393
  "learning_rate": 6.456692913385826e-05,
1394
- "loss": 0.0705,
1395
  "step": 231
1396
  },
1397
  {
1398
  "epoch": 7.25,
1399
  "learning_rate": 6.440944881889764e-05,
1400
- "loss": 0.1301,
1401
  "step": 232
1402
  },
1403
  {
1404
  "epoch": 7.28,
1405
  "learning_rate": 6.425196850393701e-05,
1406
- "loss": 0.1262,
1407
  "step": 233
1408
  },
1409
  {
1410
  "epoch": 7.31,
1411
  "learning_rate": 6.409448818897637e-05,
1412
- "loss": 0.0997,
1413
  "step": 234
1414
  },
1415
  {
1416
  "epoch": 7.34,
1417
  "learning_rate": 6.393700787401575e-05,
1418
- "loss": 0.1171,
1419
  "step": 235
1420
  },
1421
  {
1422
  "epoch": 7.38,
1423
  "learning_rate": 6.377952755905512e-05,
1424
- "loss": 0.069,
1425
  "step": 236
1426
  },
1427
  {
1428
  "epoch": 7.41,
1429
  "learning_rate": 6.36220472440945e-05,
1430
- "loss": 0.089,
1431
  "step": 237
1432
  },
1433
  {
1434
  "epoch": 7.44,
1435
  "learning_rate": 6.346456692913386e-05,
1436
- "loss": 0.1417,
1437
  "step": 238
1438
  },
1439
  {
1440
  "epoch": 7.47,
1441
  "learning_rate": 6.330708661417323e-05,
1442
- "loss": 0.1289,
1443
  "step": 239
1444
  },
1445
  {
1446
  "epoch": 7.5,
1447
  "learning_rate": 6.31496062992126e-05,
1448
- "loss": 0.0897,
1449
  "step": 240
1450
  },
1451
  {
1452
  "epoch": 7.53,
1453
  "learning_rate": 6.299212598425197e-05,
1454
- "loss": 0.0718,
1455
  "step": 241
1456
  },
1457
  {
1458
  "epoch": 7.56,
1459
  "learning_rate": 6.283464566929134e-05,
1460
- "loss": 0.1893,
1461
  "step": 242
1462
  },
1463
  {
1464
  "epoch": 7.59,
1465
  "learning_rate": 6.267716535433072e-05,
1466
- "loss": 0.0971,
1467
  "step": 243
1468
  },
1469
  {
1470
  "epoch": 7.62,
1471
  "learning_rate": 6.251968503937008e-05,
1472
- "loss": 0.1143,
1473
  "step": 244
1474
  },
1475
  {
1476
  "epoch": 7.66,
1477
  "learning_rate": 6.236220472440946e-05,
1478
- "loss": 0.0718,
1479
  "step": 245
1480
  },
1481
  {
1482
  "epoch": 7.69,
1483
  "learning_rate": 6.220472440944882e-05,
1484
- "loss": 0.0847,
1485
  "step": 246
1486
  },
1487
  {
1488
  "epoch": 7.72,
1489
  "learning_rate": 6.204724409448819e-05,
1490
- "loss": 0.1153,
1491
  "step": 247
1492
  },
1493
  {
1494
  "epoch": 7.75,
1495
  "learning_rate": 6.188976377952757e-05,
1496
- "loss": 0.0645,
1497
  "step": 248
1498
  },
1499
  {
1500
  "epoch": 7.78,
1501
  "learning_rate": 6.173228346456693e-05,
1502
- "loss": 0.1279,
1503
  "step": 249
1504
  },
1505
  {
1506
  "epoch": 7.81,
1507
  "learning_rate": 6.15748031496063e-05,
1508
- "loss": 0.1142,
1509
  "step": 250
1510
  },
1511
  {
1512
  "epoch": 7.84,
1513
  "learning_rate": 6.141732283464568e-05,
1514
- "loss": 0.1066,
1515
  "step": 251
1516
  },
1517
  {
1518
  "epoch": 7.88,
1519
  "learning_rate": 6.125984251968504e-05,
1520
- "loss": 0.1069,
1521
  "step": 252
1522
  },
1523
  {
1524
  "epoch": 7.91,
1525
  "learning_rate": 6.110236220472442e-05,
1526
- "loss": 0.0769,
1527
  "step": 253
1528
  },
1529
  {
1530
  "epoch": 7.94,
1531
  "learning_rate": 6.094488188976378e-05,
1532
- "loss": 0.1081,
1533
  "step": 254
1534
  },
1535
  {
1536
  "epoch": 7.97,
1537
  "learning_rate": 6.078740157480315e-05,
1538
- "loss": 0.1001,
1539
  "step": 255
1540
  },
1541
  {
1542
  "epoch": 8.0,
1543
  "learning_rate": 6.0629921259842526e-05,
1544
- "loss": 0.1873,
1545
  "step": 256
1546
  },
1547
  {
1548
  "epoch": 8.03,
1549
  "learning_rate": 6.047244094488189e-05,
1550
- "loss": 0.064,
1551
  "step": 257
1552
  },
1553
  {
1554
  "epoch": 8.06,
1555
  "learning_rate": 6.031496062992126e-05,
1556
- "loss": 0.0706,
1557
  "step": 258
1558
  },
1559
  {
1560
  "epoch": 8.09,
1561
  "learning_rate": 6.0157480314960636e-05,
1562
- "loss": 0.0778,
1563
  "step": 259
1564
  },
1565
  {
1566
  "epoch": 8.12,
1567
  "learning_rate": 6e-05,
1568
- "loss": 0.0954,
1569
  "step": 260
1570
  },
1571
  {
1572
  "epoch": 8.16,
1573
  "learning_rate": 5.984251968503938e-05,
1574
- "loss": 0.0639,
1575
  "step": 261
1576
  },
1577
  {
1578
  "epoch": 8.19,
1579
  "learning_rate": 5.9685039370078746e-05,
1580
- "loss": 0.0911,
1581
  "step": 262
1582
  },
1583
  {
1584
  "epoch": 8.22,
1585
  "learning_rate": 5.952755905511811e-05,
1586
- "loss": 0.0805,
1587
  "step": 263
1588
  },
1589
  {
1590
  "epoch": 8.25,
1591
  "learning_rate": 5.9370078740157483e-05,
1592
- "loss": 0.1163,
1593
  "step": 264
1594
  },
1595
  {
1596
  "epoch": 8.28,
1597
  "learning_rate": 5.9212598425196856e-05,
1598
- "loss": 0.0768,
1599
  "step": 265
1600
  },
1601
  {
1602
  "epoch": 8.31,
1603
  "learning_rate": 5.905511811023622e-05,
1604
- "loss": 0.0607,
1605
  "step": 266
1606
  },
1607
  {
1608
  "epoch": 8.34,
1609
  "learning_rate": 5.889763779527559e-05,
1610
- "loss": 0.0577,
1611
  "step": 267
1612
  },
1613
  {
1614
  "epoch": 8.38,
1615
  "learning_rate": 5.874015748031496e-05,
1616
- "loss": 0.0714,
1617
  "step": 268
1618
  },
1619
  {
1620
  "epoch": 8.41,
1621
  "learning_rate": 5.858267716535434e-05,
1622
- "loss": 0.0782,
1623
  "step": 269
1624
  },
1625
  {
1626
  "epoch": 8.44,
1627
  "learning_rate": 5.84251968503937e-05,
1628
- "loss": 0.0561,
1629
  "step": 270
1630
  },
1631
  {
1632
  "epoch": 8.47,
1633
  "learning_rate": 5.826771653543307e-05,
1634
- "loss": 0.0916,
1635
  "step": 271
1636
  },
1637
  {
1638
  "epoch": 8.5,
1639
  "learning_rate": 5.811023622047245e-05,
1640
- "loss": 0.0822,
1641
  "step": 272
1642
  },
1643
  {
1644
  "epoch": 8.53,
1645
  "learning_rate": 5.795275590551181e-05,
1646
- "loss": 0.11,
1647
  "step": 273
1648
  },
1649
  {
1650
  "epoch": 8.56,
1651
  "learning_rate": 5.779527559055118e-05,
1652
- "loss": 0.1089,
1653
  "step": 274
1654
  },
1655
  {
1656
  "epoch": 8.59,
1657
  "learning_rate": 5.763779527559056e-05,
1658
- "loss": 0.0626,
1659
  "step": 275
1660
  },
1661
  {
1662
  "epoch": 8.62,
1663
  "learning_rate": 5.748031496062992e-05,
1664
- "loss": 0.0654,
1665
  "step": 276
1666
  },
1667
  {
1668
  "epoch": 8.66,
1669
  "learning_rate": 5.73228346456693e-05,
1670
- "loss": 0.1171,
1671
  "step": 277
1672
  },
1673
  {
1674
  "epoch": 8.69,
1675
  "learning_rate": 5.716535433070867e-05,
1676
- "loss": 0.0866,
1677
  "step": 278
1678
  },
1679
  {
1680
  "epoch": 8.72,
1681
  "learning_rate": 5.700787401574803e-05,
1682
- "loss": 0.0735,
1683
  "step": 279
1684
  },
1685
  {
1686
  "epoch": 8.75,
1687
  "learning_rate": 5.6850393700787404e-05,
1688
- "loss": 0.1289,
1689
  "step": 280
1690
  },
1691
  {
1692
  "epoch": 8.78,
1693
  "learning_rate": 5.6692913385826777e-05,
1694
- "loss": 0.1031,
1695
  "step": 281
1696
  },
1697
  {
1698
  "epoch": 8.81,
1699
  "learning_rate": 5.653543307086614e-05,
1700
- "loss": 0.1215,
1701
  "step": 282
1702
  },
1703
  {
1704
  "epoch": 8.84,
1705
  "learning_rate": 5.6377952755905514e-05,
1706
- "loss": 0.1482,
1707
  "step": 283
1708
  },
1709
  {
1710
  "epoch": 8.88,
1711
  "learning_rate": 5.622047244094488e-05,
1712
- "loss": 0.1245,
1713
  "step": 284
1714
  },
1715
  {
1716
  "epoch": 8.91,
1717
  "learning_rate": 5.606299212598426e-05,
1718
- "loss": 0.0637,
1719
  "step": 285
1720
  },
1721
  {
1722
  "epoch": 8.94,
1723
  "learning_rate": 5.5905511811023624e-05,
1724
- "loss": 0.0751,
1725
  "step": 286
1726
  },
1727
  {
1728
  "epoch": 8.97,
1729
  "learning_rate": 5.574803149606299e-05,
1730
- "loss": 0.0757,
1731
  "step": 287
1732
  },
1733
  {
1734
  "epoch": 9.0,
1735
  "learning_rate": 5.559055118110237e-05,
1736
- "loss": 0.0941,
1737
  "step": 288
1738
  },
1739
  {
1740
  "epoch": 9.03,
1741
  "learning_rate": 5.5433070866141734e-05,
1742
- "loss": 0.1105,
1743
  "step": 289
1744
  },
1745
  {
1746
  "epoch": 9.06,
1747
  "learning_rate": 5.52755905511811e-05,
1748
- "loss": 0.0519,
1749
  "step": 290
1750
  },
1751
  {
1752
  "epoch": 9.09,
1753
  "learning_rate": 5.511811023622048e-05,
1754
- "loss": 0.0859,
1755
  "step": 291
1756
  },
1757
  {
1758
  "epoch": 9.12,
1759
  "learning_rate": 5.496062992125984e-05,
1760
- "loss": 0.0568,
1761
  "step": 292
1762
  },
1763
  {
1764
  "epoch": 9.16,
1765
  "learning_rate": 5.480314960629922e-05,
1766
- "loss": 0.1288,
1767
  "step": 293
1768
  },
1769
  {
1770
  "epoch": 9.19,
1771
  "learning_rate": 5.464566929133859e-05,
1772
- "loss": 0.083,
1773
  "step": 294
1774
  },
1775
  {
1776
  "epoch": 9.22,
1777
  "learning_rate": 5.448818897637795e-05,
1778
- "loss": 0.1869,
1779
  "step": 295
1780
  },
1781
  {
1782
  "epoch": 9.25,
1783
  "learning_rate": 5.433070866141733e-05,
1784
- "loss": 0.0604,
1785
  "step": 296
1786
  },
1787
  {
1788
  "epoch": 9.28,
1789
  "learning_rate": 5.41732283464567e-05,
1790
- "loss": 0.073,
1791
  "step": 297
1792
  },
1793
  {
1794
  "epoch": 9.31,
1795
  "learning_rate": 5.401574803149606e-05,
1796
- "loss": 0.0853,
1797
  "step": 298
1798
  },
1799
  {
1800
  "epoch": 9.34,
1801
  "learning_rate": 5.3858267716535435e-05,
1802
- "loss": 0.1008,
1803
  "step": 299
1804
  },
1805
  {
1806
  "epoch": 9.38,
1807
  "learning_rate": 5.37007874015748e-05,
1808
- "loss": 0.0557,
1809
  "step": 300
1810
  },
1811
  {
1812
  "epoch": 9.41,
1813
  "learning_rate": 5.354330708661418e-05,
1814
- "loss": 0.062,
1815
  "step": 301
1816
  },
1817
  {
1818
  "epoch": 9.44,
1819
  "learning_rate": 5.3385826771653545e-05,
1820
- "loss": 0.081,
1821
  "step": 302
1822
  },
1823
  {
1824
  "epoch": 9.47,
1825
  "learning_rate": 5.322834645669291e-05,
1826
- "loss": 0.0598,
1827
  "step": 303
1828
  },
1829
  {
1830
  "epoch": 9.5,
1831
  "learning_rate": 5.307086614173229e-05,
1832
- "loss": 0.0602,
1833
  "step": 304
1834
  },
1835
  {
1836
  "epoch": 9.53,
1837
  "learning_rate": 5.2913385826771654e-05,
1838
- "loss": 0.0549,
1839
  "step": 305
1840
  },
1841
  {
1842
  "epoch": 9.56,
1843
  "learning_rate": 5.275590551181102e-05,
1844
- "loss": 0.0533,
1845
  "step": 306
1846
  },
1847
  {
1848
  "epoch": 9.59,
1849
  "learning_rate": 5.25984251968504e-05,
1850
- "loss": 0.0817,
1851
  "step": 307
1852
  },
1853
  {
1854
  "epoch": 9.62,
1855
  "learning_rate": 5.2440944881889764e-05,
1856
- "loss": 0.0674,
1857
  "step": 308
1858
  },
1859
  {
1860
  "epoch": 9.66,
1861
  "learning_rate": 5.228346456692914e-05,
1862
- "loss": 0.1059,
1863
  "step": 309
1864
  },
1865
  {
1866
  "epoch": 9.69,
1867
  "learning_rate": 5.212598425196851e-05,
1868
- "loss": 0.0883,
1869
  "step": 310
1870
  },
1871
  {
1872
  "epoch": 9.72,
1873
  "learning_rate": 5.1968503937007874e-05,
1874
- "loss": 0.1352,
1875
  "step": 311
1876
  },
1877
  {
1878
  "epoch": 9.75,
1879
  "learning_rate": 5.181102362204725e-05,
1880
- "loss": 0.0634,
1881
  "step": 312
1882
  },
1883
  {
1884
  "epoch": 9.78,
1885
  "learning_rate": 5.165354330708662e-05,
1886
- "loss": 0.0599,
1887
  "step": 313
1888
  },
1889
  {
1890
  "epoch": 9.81,
1891
  "learning_rate": 5.1496062992125984e-05,
1892
- "loss": 0.0545,
1893
  "step": 314
1894
  },
1895
  {
1896
  "epoch": 9.84,
1897
  "learning_rate": 5.1338582677165356e-05,
1898
- "loss": 0.1062,
1899
  "step": 315
1900
  },
1901
  {
1902
  "epoch": 9.88,
1903
  "learning_rate": 5.118110236220473e-05,
1904
- "loss": 0.0659,
1905
  "step": 316
1906
  },
1907
  {
1908
  "epoch": 9.91,
1909
  "learning_rate": 5.10236220472441e-05,
1910
- "loss": 0.0969,
1911
  "step": 317
1912
  },
1913
  {
1914
  "epoch": 9.94,
1915
  "learning_rate": 5.0866141732283466e-05,
1916
- "loss": 0.0963,
1917
  "step": 318
1918
  },
1919
  {
1920
  "epoch": 9.97,
1921
  "learning_rate": 5.070866141732283e-05,
1922
- "loss": 0.0849,
1923
  "step": 319
1924
  },
1925
  {
1926
  "epoch": 10.0,
1927
  "learning_rate": 5.055118110236221e-05,
1928
- "loss": 0.0895,
1929
  "step": 320
1930
  },
1931
  {
1932
  "epoch": 10.03,
1933
  "learning_rate": 5.0393700787401575e-05,
1934
- "loss": 0.0552,
1935
  "step": 321
1936
  },
1937
  {
1938
  "epoch": 10.06,
1939
  "learning_rate": 5.023622047244094e-05,
1940
- "loss": 0.0649,
1941
  "step": 322
1942
  },
1943
  {
1944
  "epoch": 10.09,
1945
  "learning_rate": 5.007874015748032e-05,
1946
- "loss": 0.052,
1947
  "step": 323
1948
  },
1949
  {
1950
  "epoch": 10.12,
1951
  "learning_rate": 4.9921259842519685e-05,
1952
- "loss": 0.0748,
1953
  "step": 324
1954
  },
1955
  {
1956
  "epoch": 10.16,
1957
  "learning_rate": 4.976377952755906e-05,
1958
- "loss": 0.1191,
1959
  "step": 325
1960
  },
1961
  {
1962
  "epoch": 10.19,
1963
  "learning_rate": 4.960629921259843e-05,
1964
- "loss": 0.0926,
1965
  "step": 326
1966
  },
1967
  {
1968
  "epoch": 10.22,
1969
  "learning_rate": 4.94488188976378e-05,
1970
- "loss": 0.0515,
1971
  "step": 327
1972
  },
1973
  {
1974
  "epoch": 10.25,
1975
  "learning_rate": 4.929133858267717e-05,
1976
- "loss": 0.0744,
1977
  "step": 328
1978
  },
1979
  {
1980
  "epoch": 10.28,
1981
  "learning_rate": 4.913385826771654e-05,
1982
- "loss": 0.0644,
1983
  "step": 329
1984
  },
1985
  {
1986
  "epoch": 10.31,
1987
  "learning_rate": 4.897637795275591e-05,
1988
- "loss": 0.0442,
1989
  "step": 330
1990
  },
1991
  {
1992
  "epoch": 10.34,
1993
  "learning_rate": 4.881889763779528e-05,
1994
- "loss": 0.0665,
1995
  "step": 331
1996
  },
1997
  {
1998
  "epoch": 10.38,
1999
  "learning_rate": 4.866141732283465e-05,
2000
- "loss": 0.0647,
2001
  "step": 332
2002
  },
2003
  {
2004
  "epoch": 10.41,
2005
  "learning_rate": 4.8503937007874014e-05,
2006
- "loss": 0.0419,
2007
  "step": 333
2008
  },
2009
  {
2010
  "epoch": 10.44,
2011
  "learning_rate": 4.8346456692913387e-05,
2012
- "loss": 0.0742,
2013
  "step": 334
2014
  },
2015
  {
2016
  "epoch": 10.47,
2017
  "learning_rate": 4.818897637795276e-05,
2018
- "loss": 0.0668,
2019
  "step": 335
2020
  },
2021
  {
2022
  "epoch": 10.5,
2023
  "learning_rate": 4.8031496062992124e-05,
2024
- "loss": 0.0583,
2025
  "step": 336
2026
  },
2027
  {
2028
  "epoch": 10.53,
2029
  "learning_rate": 4.7874015748031496e-05,
2030
- "loss": 0.0599,
2031
  "step": 337
2032
  },
2033
  {
2034
  "epoch": 10.56,
2035
  "learning_rate": 4.771653543307087e-05,
2036
- "loss": 0.0678,
2037
  "step": 338
2038
  },
2039
  {
2040
  "epoch": 10.59,
2041
  "learning_rate": 4.755905511811024e-05,
2042
- "loss": 0.056,
2043
  "step": 339
2044
  },
2045
  {
2046
  "epoch": 10.62,
2047
  "learning_rate": 4.7401574803149606e-05,
2048
- "loss": 0.0767,
2049
  "step": 340
2050
  },
2051
  {
2052
  "epoch": 10.66,
2053
  "learning_rate": 4.724409448818898e-05,
2054
- "loss": 0.0777,
2055
  "step": 341
2056
  },
2057
  {
2058
  "epoch": 10.69,
2059
  "learning_rate": 4.708661417322835e-05,
2060
- "loss": 0.1102,
2061
  "step": 342
2062
  },
2063
  {
2064
  "epoch": 10.72,
2065
  "learning_rate": 4.692913385826772e-05,
2066
- "loss": 0.0733,
2067
  "step": 343
2068
  },
2069
  {
2070
  "epoch": 10.75,
2071
  "learning_rate": 4.677165354330709e-05,
2072
- "loss": 0.0759,
2073
  "step": 344
2074
  },
2075
  {
2076
  "epoch": 10.78,
2077
  "learning_rate": 4.661417322834646e-05,
2078
- "loss": 0.0686,
2079
  "step": 345
2080
  },
2081
  {
2082
  "epoch": 10.81,
2083
  "learning_rate": 4.645669291338583e-05,
2084
- "loss": 0.0551,
2085
  "step": 346
2086
  },
2087
  {
2088
  "epoch": 10.84,
2089
  "learning_rate": 4.6299212598425204e-05,
2090
- "loss": 0.0944,
2091
  "step": 347
2092
  },
2093
  {
2094
  "epoch": 10.88,
2095
  "learning_rate": 4.614173228346457e-05,
2096
- "loss": 0.1065,
2097
  "step": 348
2098
  },
2099
  {
2100
  "epoch": 10.91,
2101
  "learning_rate": 4.5984251968503935e-05,
2102
- "loss": 0.1034,
2103
  "step": 349
2104
  },
2105
  {
2106
  "epoch": 10.94,
2107
  "learning_rate": 4.582677165354331e-05,
2108
- "loss": 0.0898,
2109
  "step": 350
2110
  },
2111
  {
2112
  "epoch": 10.97,
2113
  "learning_rate": 4.566929133858268e-05,
2114
- "loss": 0.0681,
2115
  "step": 351
2116
  },
2117
  {
2118
  "epoch": 11.0,
2119
  "learning_rate": 4.5511811023622045e-05,
2120
- "loss": 0.1005,
2121
  "step": 352
2122
  },
2123
  {
2124
  "epoch": 11.03,
2125
  "learning_rate": 4.535433070866142e-05,
2126
- "loss": 0.058,
2127
  "step": 353
2128
  },
2129
  {
2130
  "epoch": 11.06,
2131
  "learning_rate": 4.519685039370079e-05,
2132
- "loss": 0.0642,
2133
  "step": 354
2134
  },
2135
  {
2136
  "epoch": 11.09,
2137
  "learning_rate": 4.503937007874016e-05,
2138
- "loss": 0.0875,
2139
  "step": 355
2140
  },
2141
  {
2142
  "epoch": 11.12,
2143
  "learning_rate": 4.488188976377953e-05,
2144
- "loss": 0.0553,
2145
  "step": 356
2146
  },
2147
  {
2148
  "epoch": 11.16,
2149
  "learning_rate": 4.47244094488189e-05,
2150
- "loss": 0.0571,
2151
  "step": 357
2152
  },
2153
  {
2154
  "epoch": 11.19,
2155
  "learning_rate": 4.456692913385827e-05,
2156
- "loss": 0.0684,
2157
  "step": 358
2158
  },
2159
  {
2160
  "epoch": 11.22,
2161
  "learning_rate": 4.4409448818897643e-05,
2162
- "loss": 0.0507,
2163
  "step": 359
2164
  },
2165
  {
2166
  "epoch": 11.25,
2167
  "learning_rate": 4.425196850393701e-05,
2168
- "loss": 0.0977,
2169
  "step": 360
2170
  },
2171
  {
2172
  "epoch": 11.28,
2173
  "learning_rate": 4.409448818897638e-05,
2174
- "loss": 0.0627,
2175
  "step": 361
2176
  },
2177
  {
2178
  "epoch": 11.31,
2179
  "learning_rate": 4.393700787401575e-05,
2180
- "loss": 0.0437,
2181
  "step": 362
2182
  },
2183
  {
2184
  "epoch": 11.34,
2185
  "learning_rate": 4.3779527559055125e-05,
2186
- "loss": 0.0688,
2187
  "step": 363
2188
  },
2189
  {
2190
  "epoch": 11.38,
2191
  "learning_rate": 4.362204724409449e-05,
2192
- "loss": 0.1032,
2193
  "step": 364
2194
  },
2195
  {
2196
  "epoch": 11.41,
2197
  "learning_rate": 4.346456692913386e-05,
2198
- "loss": 0.0472,
2199
  "step": 365
2200
  },
2201
  {
2202
  "epoch": 11.44,
2203
  "learning_rate": 4.330708661417323e-05,
2204
- "loss": 0.0835,
2205
  "step": 366
2206
  },
2207
  {
2208
  "epoch": 11.47,
2209
  "learning_rate": 4.31496062992126e-05,
2210
- "loss": 0.0902,
2211
  "step": 367
2212
  },
2213
  {
2214
  "epoch": 11.5,
2215
  "learning_rate": 4.2992125984251966e-05,
2216
- "loss": 0.1016,
2217
  "step": 368
2218
  },
2219
  {
2220
  "epoch": 11.53,
2221
  "learning_rate": 4.283464566929134e-05,
2222
- "loss": 0.0461,
2223
  "step": 369
2224
  },
2225
  {
2226
  "epoch": 11.56,
2227
  "learning_rate": 4.267716535433071e-05,
2228
- "loss": 0.0594,
2229
  "step": 370
2230
  },
2231
  {
2232
  "epoch": 11.59,
2233
  "learning_rate": 4.251968503937008e-05,
2234
- "loss": 0.054,
2235
  "step": 371
2236
  },
2237
  {
2238
  "epoch": 11.62,
2239
  "learning_rate": 4.236220472440945e-05,
2240
- "loss": 0.0902,
2241
  "step": 372
2242
  },
2243
  {
2244
  "epoch": 11.66,
2245
  "learning_rate": 4.220472440944882e-05,
2246
- "loss": 0.0645,
2247
  "step": 373
2248
  },
2249
  {
2250
  "epoch": 11.69,
2251
  "learning_rate": 4.204724409448819e-05,
2252
- "loss": 0.0417,
2253
  "step": 374
2254
  },
2255
  {
2256
  "epoch": 11.72,
2257
  "learning_rate": 4.1889763779527564e-05,
2258
- "loss": 0.0784,
2259
  "step": 375
2260
  },
2261
  {
2262
  "epoch": 11.75,
2263
  "learning_rate": 4.173228346456693e-05,
2264
- "loss": 0.0825,
2265
  "step": 376
2266
  },
2267
  {
2268
  "epoch": 11.78,
2269
  "learning_rate": 4.15748031496063e-05,
2270
- "loss": 0.1275,
2271
  "step": 377
2272
  },
2273
  {
2274
  "epoch": 11.81,
2275
  "learning_rate": 4.1417322834645674e-05,
2276
- "loss": 0.0802,
2277
  "step": 378
2278
  },
2279
  {
2280
  "epoch": 11.84,
2281
  "learning_rate": 4.1259842519685046e-05,
2282
- "loss": 0.0518,
2283
  "step": 379
2284
  },
2285
  {
2286
  "epoch": 11.88,
2287
  "learning_rate": 4.110236220472441e-05,
2288
- "loss": 0.0704,
2289
  "step": 380
2290
  },
2291
  {
2292
  "epoch": 11.91,
2293
  "learning_rate": 4.0944881889763784e-05,
2294
- "loss": 0.0865,
2295
  "step": 381
2296
  },
2297
  {
2298
  "epoch": 11.94,
2299
  "learning_rate": 4.078740157480315e-05,
2300
- "loss": 0.1193,
2301
  "step": 382
2302
  },
2303
  {
2304
  "epoch": 11.97,
2305
  "learning_rate": 4.062992125984252e-05,
2306
- "loss": 0.0598,
2307
  "step": 383
2308
  },
2309
  {
2310
  "epoch": 12.0,
2311
  "learning_rate": 4.047244094488189e-05,
2312
- "loss": 0.0559,
2313
  "step": 384
2314
  },
2315
  {
2316
  "epoch": 12.03,
2317
  "learning_rate": 4.031496062992126e-05,
2318
- "loss": 0.0717,
2319
  "step": 385
2320
  },
2321
  {
2322
  "epoch": 12.06,
2323
  "learning_rate": 4.015748031496063e-05,
2324
- "loss": 0.0506,
2325
  "step": 386
2326
  },
2327
  {
2328
  "epoch": 12.09,
2329
  "learning_rate": 4e-05,
2330
- "loss": 0.0957,
2331
  "step": 387
2332
  },
2333
  {
2334
  "epoch": 12.12,
2335
  "learning_rate": 3.984251968503937e-05,
2336
- "loss": 0.0483,
2337
  "step": 388
2338
  },
2339
  {
2340
  "epoch": 12.16,
2341
  "learning_rate": 3.968503937007874e-05,
2342
- "loss": 0.04,
2343
  "step": 389
2344
  },
2345
  {
2346
  "epoch": 12.19,
2347
  "learning_rate": 3.952755905511811e-05,
2348
- "loss": 0.081,
2349
  "step": 390
2350
  },
2351
  {
2352
  "epoch": 12.22,
2353
  "learning_rate": 3.9370078740157485e-05,
2354
- "loss": 0.0772,
2355
  "step": 391
2356
  },
2357
  {
2358
  "epoch": 12.25,
2359
  "learning_rate": 3.921259842519685e-05,
2360
- "loss": 0.0854,
2361
  "step": 392
2362
  },
2363
  {
2364
  "epoch": 12.28,
2365
  "learning_rate": 3.905511811023622e-05,
2366
- "loss": 0.0503,
2367
  "step": 393
2368
  },
2369
  {
2370
  "epoch": 12.31,
2371
  "learning_rate": 3.8897637795275595e-05,
2372
- "loss": 0.0645,
2373
  "step": 394
2374
  },
2375
  {
2376
  "epoch": 12.34,
2377
  "learning_rate": 3.874015748031497e-05,
2378
- "loss": 0.0711,
2379
  "step": 395
2380
  },
2381
  {
2382
  "epoch": 12.38,
2383
  "learning_rate": 3.858267716535433e-05,
2384
- "loss": 0.1467,
2385
  "step": 396
2386
  },
2387
  {
2388
  "epoch": 12.41,
2389
  "learning_rate": 3.8425196850393705e-05,
2390
- "loss": 0.1282,
2391
  "step": 397
2392
  },
2393
  {
2394
  "epoch": 12.44,
2395
  "learning_rate": 3.826771653543308e-05,
2396
- "loss": 0.0529,
2397
  "step": 398
2398
  },
2399
  {
2400
  "epoch": 12.47,
2401
  "learning_rate": 3.811023622047244e-05,
2402
- "loss": 0.0708,
2403
  "step": 399
2404
  },
2405
  {
2406
  "epoch": 12.5,
2407
  "learning_rate": 3.795275590551181e-05,
2408
- "loss": 0.0641,
2409
  "step": 400
2410
  },
2411
  {
2412
  "epoch": 12.53,
2413
  "learning_rate": 3.779527559055118e-05,
2414
- "loss": 0.0501,
2415
  "step": 401
2416
  },
2417
  {
2418
  "epoch": 12.56,
2419
  "learning_rate": 3.763779527559055e-05,
2420
- "loss": 0.0597,
2421
  "step": 402
2422
  },
2423
  {
2424
  "epoch": 12.59,
2425
  "learning_rate": 3.7480314960629924e-05,
2426
- "loss": 0.0723,
2427
  "step": 403
2428
  },
2429
  {
2430
  "epoch": 12.62,
2431
  "learning_rate": 3.732283464566929e-05,
2432
- "loss": 0.0837,
2433
  "step": 404
2434
  },
2435
  {
2436
  "epoch": 12.66,
2437
  "learning_rate": 3.716535433070866e-05,
2438
- "loss": 0.0464,
2439
  "step": 405
2440
  },
2441
  {
2442
  "epoch": 12.69,
2443
  "learning_rate": 3.7007874015748034e-05,
2444
- "loss": 0.0895,
2445
  "step": 406
2446
  },
2447
  {
2448
  "epoch": 12.72,
2449
  "learning_rate": 3.6850393700787406e-05,
2450
- "loss": 0.0424,
2451
  "step": 407
2452
  },
2453
  {
2454
  "epoch": 12.75,
2455
  "learning_rate": 3.669291338582677e-05,
2456
- "loss": 0.1492,
2457
  "step": 408
2458
  },
2459
  {
2460
  "epoch": 12.78,
2461
  "learning_rate": 3.6535433070866144e-05,
2462
- "loss": 0.0488,
2463
  "step": 409
2464
  },
2465
  {
2466
  "epoch": 12.81,
2467
  "learning_rate": 3.6377952755905516e-05,
2468
- "loss": 0.0438,
2469
  "step": 410
2470
  },
2471
  {
2472
  "epoch": 12.84,
2473
  "learning_rate": 3.622047244094489e-05,
2474
- "loss": 0.0587,
2475
  "step": 411
2476
  },
2477
  {
2478
  "epoch": 12.88,
2479
  "learning_rate": 3.6062992125984253e-05,
2480
- "loss": 0.0739,
2481
  "step": 412
2482
  },
2483
  {
2484
  "epoch": 12.91,
2485
  "learning_rate": 3.5905511811023626e-05,
2486
- "loss": 0.0785,
2487
  "step": 413
2488
  },
2489
  {
2490
  "epoch": 12.94,
2491
  "learning_rate": 3.5748031496063e-05,
2492
- "loss": 0.0688,
2493
  "step": 414
2494
  },
2495
  {
2496
  "epoch": 12.97,
2497
  "learning_rate": 3.559055118110236e-05,
2498
- "loss": 0.0526,
2499
  "step": 415
2500
  },
2501
  {
2502
  "epoch": 13.0,
2503
  "learning_rate": 3.5433070866141735e-05,
2504
- "loss": 0.0496,
2505
  "step": 416
2506
  },
2507
  {
2508
  "epoch": 13.03,
2509
  "learning_rate": 3.52755905511811e-05,
2510
- "loss": 0.0547,
2511
  "step": 417
2512
  },
2513
  {
2514
  "epoch": 13.06,
2515
  "learning_rate": 3.511811023622047e-05,
2516
- "loss": 0.066,
2517
  "step": 418
2518
  },
2519
  {
2520
  "epoch": 13.09,
2521
  "learning_rate": 3.4960629921259845e-05,
2522
- "loss": 0.0431,
2523
  "step": 419
2524
  },
2525
  {
2526
  "epoch": 13.12,
2527
  "learning_rate": 3.480314960629921e-05,
2528
- "loss": 0.0666,
2529
  "step": 420
2530
  },
2531
  {
2532
  "epoch": 13.16,
2533
  "learning_rate": 3.464566929133858e-05,
2534
- "loss": 0.0427,
2535
  "step": 421
2536
  },
2537
  {
2538
  "epoch": 13.19,
2539
  "learning_rate": 3.4488188976377955e-05,
2540
- "loss": 0.0458,
2541
  "step": 422
2542
  },
2543
  {
2544
  "epoch": 13.22,
2545
  "learning_rate": 3.433070866141733e-05,
2546
- "loss": 0.1239,
2547
  "step": 423
2548
  },
2549
  {
2550
  "epoch": 13.25,
2551
  "learning_rate": 3.417322834645669e-05,
2552
- "loss": 0.0833,
2553
  "step": 424
2554
  },
2555
  {
2556
  "epoch": 13.28,
2557
  "learning_rate": 3.4015748031496065e-05,
2558
- "loss": 0.1358,
2559
  "step": 425
2560
  },
2561
  {
2562
  "epoch": 13.31,
2563
  "learning_rate": 3.385826771653544e-05,
2564
- "loss": 0.0831,
2565
  "step": 426
2566
  },
2567
  {
2568
  "epoch": 13.34,
2569
  "learning_rate": 3.370078740157481e-05,
2570
- "loss": 0.0452,
2571
  "step": 427
2572
  },
2573
  {
2574
  "epoch": 13.38,
2575
  "learning_rate": 3.3543307086614174e-05,
2576
- "loss": 0.0648,
2577
  "step": 428
2578
  },
2579
  {
2580
  "epoch": 13.41,
2581
  "learning_rate": 3.3385826771653546e-05,
2582
- "loss": 0.0526,
2583
  "step": 429
2584
  },
2585
  {
2586
  "epoch": 13.44,
2587
  "learning_rate": 3.322834645669292e-05,
2588
- "loss": 0.0701,
2589
  "step": 430
2590
  },
2591
  {
2592
  "epoch": 13.47,
2593
  "learning_rate": 3.3070866141732284e-05,
2594
- "loss": 0.1528,
2595
  "step": 431
2596
  },
2597
  {
2598
  "epoch": 13.5,
2599
  "learning_rate": 3.2913385826771656e-05,
2600
- "loss": 0.0529,
2601
  "step": 432
2602
  },
2603
  {
2604
  "epoch": 13.53,
2605
  "learning_rate": 3.275590551181102e-05,
2606
- "loss": 0.0458,
2607
  "step": 433
2608
  },
2609
  {
2610
  "epoch": 13.56,
2611
  "learning_rate": 3.2598425196850394e-05,
2612
- "loss": 0.0489,
2613
  "step": 434
2614
  },
2615
  {
2616
  "epoch": 13.59,
2617
  "learning_rate": 3.2440944881889766e-05,
2618
- "loss": 0.0672,
2619
  "step": 435
2620
  },
2621
  {
2622
  "epoch": 13.62,
2623
  "learning_rate": 3.228346456692913e-05,
2624
- "loss": 0.0503,
2625
  "step": 436
2626
  },
2627
  {
2628
  "epoch": 13.66,
2629
  "learning_rate": 3.2125984251968504e-05,
2630
- "loss": 0.0447,
2631
  "step": 437
2632
  },
2633
  {
2634
  "epoch": 13.69,
2635
  "learning_rate": 3.1968503937007876e-05,
2636
- "loss": 0.0524,
2637
  "step": 438
2638
  },
2639
  {
2640
  "epoch": 13.72,
2641
  "learning_rate": 3.181102362204725e-05,
2642
- "loss": 0.1048,
2643
  "step": 439
2644
  },
2645
  {
2646
  "epoch": 13.75,
2647
  "learning_rate": 3.165354330708661e-05,
2648
- "loss": 0.062,
2649
  "step": 440
2650
  },
2651
  {
2652
  "epoch": 13.78,
2653
  "learning_rate": 3.1496062992125985e-05,
2654
- "loss": 0.0785,
2655
  "step": 441
2656
  },
2657
  {
2658
  "epoch": 13.81,
2659
  "learning_rate": 3.133858267716536e-05,
2660
- "loss": 0.0626,
2661
  "step": 442
2662
  },
2663
  {
2664
  "epoch": 13.84,
2665
  "learning_rate": 3.118110236220473e-05,
2666
- "loss": 0.0703,
2667
  "step": 443
2668
  },
2669
  {
2670
  "epoch": 13.88,
2671
  "learning_rate": 3.1023622047244095e-05,
2672
- "loss": 0.0868,
2673
  "step": 444
2674
  },
2675
  {
2676
  "epoch": 13.91,
2677
  "learning_rate": 3.086614173228347e-05,
2678
- "loss": 0.074,
2679
  "step": 445
2680
  },
2681
  {
2682
  "epoch": 13.94,
2683
  "learning_rate": 3.070866141732284e-05,
2684
- "loss": 0.0459,
2685
  "step": 446
2686
  },
2687
  {
2688
  "epoch": 13.97,
2689
  "learning_rate": 3.055118110236221e-05,
2690
- "loss": 0.0457,
2691
  "step": 447
2692
  },
2693
  {
2694
  "epoch": 14.0,
2695
  "learning_rate": 3.0393700787401574e-05,
2696
- "loss": 0.0653,
2697
  "step": 448
2698
  },
2699
  {
2700
  "epoch": 14.03,
2701
  "learning_rate": 3.0236220472440946e-05,
2702
- "loss": 0.0703,
2703
  "step": 449
2704
  },
2705
  {
2706
  "epoch": 14.06,
2707
  "learning_rate": 3.0078740157480318e-05,
2708
- "loss": 0.046,
2709
  "step": 450
2710
  },
2711
  {
2712
  "epoch": 14.09,
2713
  "learning_rate": 2.992125984251969e-05,
2714
- "loss": 0.0864,
2715
  "step": 451
2716
  },
2717
  {
2718
  "epoch": 14.12,
2719
  "learning_rate": 2.9763779527559056e-05,
2720
- "loss": 0.0478,
2721
  "step": 452
2722
  },
2723
  {
2724
  "epoch": 14.16,
2725
  "learning_rate": 2.9606299212598428e-05,
2726
- "loss": 0.0673,
2727
  "step": 453
2728
  },
2729
  {
2730
  "epoch": 14.19,
2731
  "learning_rate": 2.9448818897637797e-05,
2732
- "loss": 0.0683,
2733
  "step": 454
2734
  },
2735
  {
2736
  "epoch": 14.22,
2737
  "learning_rate": 2.929133858267717e-05,
2738
- "loss": 0.0485,
2739
  "step": 455
2740
  },
2741
  {
2742
  "epoch": 14.25,
2743
  "learning_rate": 2.9133858267716534e-05,
2744
- "loss": 0.0434,
2745
  "step": 456
2746
  },
2747
  {
2748
  "epoch": 14.28,
2749
  "learning_rate": 2.8976377952755906e-05,
2750
- "loss": 0.0426,
2751
  "step": 457
2752
  },
2753
  {
2754
  "epoch": 14.31,
2755
  "learning_rate": 2.881889763779528e-05,
2756
- "loss": 0.0403,
2757
  "step": 458
2758
  },
2759
  {
2760
  "epoch": 14.34,
2761
  "learning_rate": 2.866141732283465e-05,
2762
- "loss": 0.0476,
2763
  "step": 459
2764
  },
2765
  {
2766
  "epoch": 14.38,
2767
  "learning_rate": 2.8503937007874016e-05,
2768
- "loss": 0.0504,
2769
  "step": 460
2770
  },
2771
  {
2772
  "epoch": 14.41,
2773
  "learning_rate": 2.8346456692913388e-05,
2774
- "loss": 0.0691,
2775
  "step": 461
2776
  },
2777
  {
2778
  "epoch": 14.44,
2779
  "learning_rate": 2.8188976377952757e-05,
2780
- "loss": 0.1004,
2781
  "step": 462
2782
  },
2783
  {
2784
  "epoch": 14.47,
2785
  "learning_rate": 2.803149606299213e-05,
2786
- "loss": 0.0428,
2787
  "step": 463
2788
  },
2789
  {
2790
  "epoch": 14.5,
2791
  "learning_rate": 2.7874015748031495e-05,
2792
- "loss": 0.1126,
2793
  "step": 464
2794
  },
2795
  {
2796
  "epoch": 14.53,
2797
  "learning_rate": 2.7716535433070867e-05,
2798
- "loss": 0.0415,
2799
  "step": 465
2800
  },
2801
  {
2802
  "epoch": 14.56,
2803
  "learning_rate": 2.755905511811024e-05,
2804
- "loss": 0.0581,
2805
  "step": 466
2806
  },
2807
  {
2808
  "epoch": 14.59,
2809
  "learning_rate": 2.740157480314961e-05,
2810
- "loss": 0.0651,
2811
  "step": 467
2812
  },
2813
  {
2814
  "epoch": 14.62,
2815
  "learning_rate": 2.7244094488188977e-05,
2816
- "loss": 0.0606,
2817
  "step": 468
2818
  },
2819
  {
2820
  "epoch": 14.66,
2821
  "learning_rate": 2.708661417322835e-05,
2822
- "loss": 0.0572,
2823
  "step": 469
2824
  },
2825
  {
2826
  "epoch": 14.69,
2827
  "learning_rate": 2.6929133858267717e-05,
2828
- "loss": 0.0617,
2829
  "step": 470
2830
  },
2831
  {
2832
  "epoch": 14.72,
2833
  "learning_rate": 2.677165354330709e-05,
2834
- "loss": 0.0493,
2835
  "step": 471
2836
  },
2837
  {
2838
  "epoch": 14.75,
2839
  "learning_rate": 2.6614173228346455e-05,
2840
- "loss": 0.0877,
2841
  "step": 472
2842
  },
2843
  {
2844
  "epoch": 14.78,
2845
  "learning_rate": 2.6456692913385827e-05,
2846
- "loss": 0.0506,
2847
  "step": 473
2848
  },
2849
  {
2850
  "epoch": 14.81,
2851
  "learning_rate": 2.62992125984252e-05,
2852
- "loss": 0.081,
2853
  "step": 474
2854
  },
2855
  {
2856
  "epoch": 14.84,
2857
  "learning_rate": 2.614173228346457e-05,
2858
- "loss": 0.0847,
2859
  "step": 475
2860
  },
2861
  {
2862
  "epoch": 14.88,
2863
  "learning_rate": 2.5984251968503937e-05,
2864
- "loss": 0.1073,
2865
  "step": 476
2866
  },
2867
  {
2868
  "epoch": 14.91,
2869
  "learning_rate": 2.582677165354331e-05,
2870
- "loss": 0.05,
2871
  "step": 477
2872
  },
2873
  {
2874
  "epoch": 14.94,
2875
  "learning_rate": 2.5669291338582678e-05,
2876
- "loss": 0.1165,
2877
  "step": 478
2878
  },
2879
  {
2880
  "epoch": 14.97,
2881
  "learning_rate": 2.551181102362205e-05,
2882
- "loss": 0.0653,
2883
  "step": 479
2884
  },
2885
  {
2886
  "epoch": 15.0,
2887
  "learning_rate": 2.5354330708661416e-05,
2888
- "loss": 0.0576,
2889
  "step": 480
2890
  },
2891
  {
2892
  "epoch": 15.03,
2893
  "learning_rate": 2.5196850393700788e-05,
2894
- "loss": 0.0384,
2895
  "step": 481
2896
  },
2897
  {
2898
  "epoch": 15.06,
2899
  "learning_rate": 2.503937007874016e-05,
2900
- "loss": 0.0547,
2901
  "step": 482
2902
  },
2903
  {
2904
  "epoch": 15.09,
2905
  "learning_rate": 2.488188976377953e-05,
2906
- "loss": 0.0667,
2907
  "step": 483
2908
  },
2909
  {
2910
  "epoch": 15.12,
2911
  "learning_rate": 2.47244094488189e-05,
2912
- "loss": 0.0549,
2913
  "step": 484
2914
  },
2915
  {
2916
  "epoch": 15.16,
2917
  "learning_rate": 2.456692913385827e-05,
2918
- "loss": 0.0548,
2919
  "step": 485
2920
  },
2921
  {
2922
  "epoch": 15.19,
2923
  "learning_rate": 2.440944881889764e-05,
2924
- "loss": 0.0579,
2925
  "step": 486
2926
  },
2927
  {
2928
  "epoch": 15.22,
2929
  "learning_rate": 2.4251968503937007e-05,
2930
- "loss": 0.0945,
2931
  "step": 487
2932
  },
2933
  {
2934
  "epoch": 15.25,
2935
  "learning_rate": 2.409448818897638e-05,
2936
- "loss": 0.0699,
2937
  "step": 488
2938
  },
2939
  {
2940
  "epoch": 15.28,
2941
  "learning_rate": 2.3937007874015748e-05,
2942
- "loss": 0.0478,
2943
  "step": 489
2944
  },
2945
  {
2946
  "epoch": 15.31,
2947
  "learning_rate": 2.377952755905512e-05,
2948
- "loss": 0.0598,
2949
  "step": 490
2950
  },
2951
  {
2952
  "epoch": 15.34,
2953
  "learning_rate": 2.362204724409449e-05,
2954
- "loss": 0.0868,
2955
  "step": 491
2956
  },
2957
  {
2958
  "epoch": 15.38,
2959
  "learning_rate": 2.346456692913386e-05,
2960
- "loss": 0.0581,
2961
  "step": 492
2962
  },
2963
  {
2964
  "epoch": 15.41,
2965
  "learning_rate": 2.330708661417323e-05,
2966
- "loss": 0.0458,
2967
  "step": 493
2968
  },
2969
  {
2970
  "epoch": 15.44,
2971
  "learning_rate": 2.3149606299212602e-05,
2972
- "loss": 0.0681,
2973
  "step": 494
2974
  },
2975
  {
2976
  "epoch": 15.47,
2977
  "learning_rate": 2.2992125984251968e-05,
2978
- "loss": 0.058,
2979
  "step": 495
2980
  },
2981
  {
2982
  "epoch": 15.5,
2983
  "learning_rate": 2.283464566929134e-05,
2984
- "loss": 0.0413,
2985
  "step": 496
2986
  },
2987
  {
2988
  "epoch": 15.53,
2989
  "learning_rate": 2.267716535433071e-05,
2990
- "loss": 0.0797,
2991
  "step": 497
2992
  },
2993
  {
2994
  "epoch": 15.56,
2995
  "learning_rate": 2.251968503937008e-05,
2996
- "loss": 0.046,
2997
  "step": 498
2998
  },
2999
  {
3000
  "epoch": 15.59,
3001
  "learning_rate": 2.236220472440945e-05,
3002
- "loss": 0.065,
3003
  "step": 499
3004
  },
3005
  {
3006
  "epoch": 15.62,
3007
  "learning_rate": 2.2204724409448822e-05,
3008
- "loss": 0.0744,
3009
  "step": 500
3010
  },
3011
  {
3012
  "epoch": 15.66,
3013
  "learning_rate": 2.204724409448819e-05,
3014
- "loss": 0.072,
3015
  "step": 501
3016
  },
3017
  {
3018
  "epoch": 15.69,
3019
  "learning_rate": 2.1889763779527563e-05,
3020
- "loss": 0.0556,
3021
  "step": 502
3022
  },
3023
  {
3024
  "epoch": 15.72,
3025
  "learning_rate": 2.173228346456693e-05,
3026
- "loss": 0.0534,
3027
  "step": 503
3028
  },
3029
  {
3030
  "epoch": 15.75,
3031
  "learning_rate": 2.15748031496063e-05,
3032
- "loss": 0.0343,
3033
  "step": 504
3034
  },
3035
  {
3036
  "epoch": 15.78,
3037
  "learning_rate": 2.141732283464567e-05,
3038
- "loss": 0.0635,
3039
  "step": 505
3040
  },
3041
  {
3042
  "epoch": 15.81,
3043
  "learning_rate": 2.125984251968504e-05,
3044
- "loss": 0.0859,
3045
  "step": 506
3046
  },
3047
  {
3048
  "epoch": 15.84,
3049
  "learning_rate": 2.110236220472441e-05,
3050
- "loss": 0.0642,
3051
  "step": 507
3052
  },
3053
  {
3054
  "epoch": 15.88,
3055
  "learning_rate": 2.0944881889763782e-05,
3056
- "loss": 0.0583,
3057
  "step": 508
3058
  },
3059
  {
3060
  "epoch": 15.91,
3061
  "learning_rate": 2.078740157480315e-05,
3062
- "loss": 0.0546,
3063
  "step": 509
3064
  },
3065
  {
3066
  "epoch": 15.94,
3067
  "learning_rate": 2.0629921259842523e-05,
3068
- "loss": 0.057,
3069
  "step": 510
3070
  },
3071
  {
3072
  "epoch": 15.97,
3073
  "learning_rate": 2.0472440944881892e-05,
3074
- "loss": 0.0907,
3075
  "step": 511
3076
  },
3077
  {
3078
  "epoch": 16.0,
3079
  "learning_rate": 2.031496062992126e-05,
3080
- "loss": 0.0563,
3081
  "step": 512
3082
  },
3083
  {
3084
  "epoch": 16.03,
3085
  "learning_rate": 2.015748031496063e-05,
3086
- "loss": 0.0561,
3087
  "step": 513
3088
  },
3089
  {
3090
  "epoch": 16.06,
3091
  "learning_rate": 2e-05,
3092
- "loss": 0.0479,
3093
  "step": 514
3094
  },
3095
  {
3096
  "epoch": 16.09,
3097
  "learning_rate": 1.984251968503937e-05,
3098
- "loss": 0.0645,
3099
  "step": 515
3100
  },
3101
  {
3102
  "epoch": 16.12,
3103
  "learning_rate": 1.9685039370078743e-05,
3104
- "loss": 0.0662,
3105
  "step": 516
3106
  },
3107
  {
3108
  "epoch": 16.16,
3109
  "learning_rate": 1.952755905511811e-05,
3110
- "loss": 0.0645,
3111
  "step": 517
3112
  },
3113
  {
3114
  "epoch": 16.19,
3115
  "learning_rate": 1.9370078740157484e-05,
3116
- "loss": 0.0394,
3117
  "step": 518
3118
  },
3119
  {
3120
  "epoch": 16.22,
3121
  "learning_rate": 1.9212598425196852e-05,
3122
- "loss": 0.0565,
3123
  "step": 519
3124
  },
3125
  {
3126
  "epoch": 16.25,
3127
  "learning_rate": 1.905511811023622e-05,
3128
- "loss": 0.0481,
3129
  "step": 520
3130
  },
3131
  {
3132
  "epoch": 16.28,
3133
  "learning_rate": 1.889763779527559e-05,
3134
- "loss": 0.0631,
3135
  "step": 521
3136
  },
3137
  {
3138
  "epoch": 16.31,
3139
  "learning_rate": 1.8740157480314962e-05,
3140
- "loss": 0.0654,
3141
  "step": 522
3142
  },
3143
  {
3144
  "epoch": 16.34,
3145
  "learning_rate": 1.858267716535433e-05,
3146
- "loss": 0.0496,
3147
  "step": 523
3148
  },
3149
  {
3150
  "epoch": 16.38,
3151
  "learning_rate": 1.8425196850393703e-05,
3152
- "loss": 0.0507,
3153
  "step": 524
3154
  },
3155
  {
3156
  "epoch": 16.41,
3157
  "learning_rate": 1.8267716535433072e-05,
3158
- "loss": 0.0363,
3159
  "step": 525
3160
  },
3161
  {
3162
  "epoch": 16.44,
3163
  "learning_rate": 1.8110236220472444e-05,
3164
- "loss": 0.0537,
3165
  "step": 526
3166
  },
3167
  {
3168
  "epoch": 16.47,
3169
  "learning_rate": 1.7952755905511813e-05,
3170
- "loss": 0.037,
3171
  "step": 527
3172
  },
3173
  {
3174
  "epoch": 16.5,
3175
  "learning_rate": 1.779527559055118e-05,
3176
- "loss": 0.059,
3177
  "step": 528
3178
  },
3179
  {
3180
  "epoch": 16.53,
3181
  "learning_rate": 1.763779527559055e-05,
3182
- "loss": 0.0739,
3183
  "step": 529
3184
  },
3185
  {
3186
  "epoch": 16.56,
3187
  "learning_rate": 1.7480314960629923e-05,
3188
- "loss": 0.0444,
3189
  "step": 530
3190
  },
3191
  {
3192
  "epoch": 16.59,
3193
  "learning_rate": 1.732283464566929e-05,
3194
- "loss": 0.0995,
3195
  "step": 531
3196
  },
3197
  {
3198
  "epoch": 16.62,
3199
  "learning_rate": 1.7165354330708663e-05,
3200
- "loss": 0.0488,
3201
  "step": 532
3202
  },
3203
  {
3204
  "epoch": 16.66,
3205
  "learning_rate": 1.7007874015748032e-05,
3206
- "loss": 0.0586,
3207
  "step": 533
3208
  },
3209
  {
3210
  "epoch": 16.69,
3211
  "learning_rate": 1.6850393700787404e-05,
3212
- "loss": 0.096,
3213
  "step": 534
3214
  },
3215
  {
3216
  "epoch": 16.72,
3217
  "learning_rate": 1.6692913385826773e-05,
3218
- "loss": 0.0592,
3219
  "step": 535
3220
  },
3221
  {
3222
  "epoch": 16.75,
3223
  "learning_rate": 1.6535433070866142e-05,
3224
- "loss": 0.0625,
3225
  "step": 536
3226
  },
3227
  {
3228
  "epoch": 16.78,
3229
  "learning_rate": 1.637795275590551e-05,
3230
- "loss": 0.0662,
3231
  "step": 537
3232
  },
3233
  {
3234
  "epoch": 16.81,
3235
  "learning_rate": 1.6220472440944883e-05,
3236
- "loss": 0.0498,
3237
  "step": 538
3238
  },
3239
  {
3240
  "epoch": 16.84,
3241
  "learning_rate": 1.6062992125984252e-05,
3242
- "loss": 0.0669,
3243
  "step": 539
3244
  },
3245
  {
3246
  "epoch": 16.88,
3247
  "learning_rate": 1.5905511811023624e-05,
3248
- "loss": 0.0774,
3249
  "step": 540
3250
  },
3251
  {
3252
  "epoch": 16.91,
3253
  "learning_rate": 1.5748031496062993e-05,
3254
- "loss": 0.0658,
3255
  "step": 541
3256
  },
3257
  {
3258
  "epoch": 16.94,
3259
  "learning_rate": 1.5590551181102365e-05,
3260
- "loss": 0.0419,
3261
  "step": 542
3262
  },
3263
  {
3264
  "epoch": 16.97,
3265
  "learning_rate": 1.5433070866141734e-05,
3266
- "loss": 0.0635,
3267
  "step": 543
3268
  },
3269
  {
3270
  "epoch": 17.0,
3271
  "learning_rate": 1.5275590551181106e-05,
3272
- "loss": 0.0827,
3273
  "step": 544
3274
  },
3275
  {
3276
  "epoch": 17.03,
3277
  "learning_rate": 1.5118110236220473e-05,
3278
- "loss": 0.0578,
3279
  "step": 545
3280
  },
3281
  {
3282
  "epoch": 17.06,
3283
  "learning_rate": 1.4960629921259845e-05,
3284
- "loss": 0.056,
3285
  "step": 546
3286
  },
3287
  {
3288
  "epoch": 17.09,
3289
  "learning_rate": 1.4803149606299214e-05,
3290
- "loss": 0.0444,
3291
  "step": 547
3292
  },
3293
  {
3294
  "epoch": 17.12,
3295
  "learning_rate": 1.4645669291338584e-05,
3296
- "loss": 0.0463,
3297
  "step": 548
3298
  },
3299
  {
3300
  "epoch": 17.16,
3301
  "learning_rate": 1.4488188976377953e-05,
3302
- "loss": 0.0872,
3303
  "step": 549
3304
  },
3305
  {
3306
  "epoch": 17.19,
3307
  "learning_rate": 1.4330708661417325e-05,
3308
- "loss": 0.03,
3309
  "step": 550
3310
  },
3311
  {
3312
  "epoch": 17.22,
3313
  "learning_rate": 1.4173228346456694e-05,
3314
- "loss": 0.05,
3315
  "step": 551
3316
  },
3317
  {
3318
  "epoch": 17.25,
3319
  "learning_rate": 1.4015748031496065e-05,
3320
- "loss": 0.0465,
3321
  "step": 552
3322
  },
3323
  {
3324
  "epoch": 17.28,
3325
  "learning_rate": 1.3858267716535433e-05,
3326
- "loss": 0.0801,
3327
  "step": 553
3328
  },
3329
  {
3330
  "epoch": 17.31,
3331
  "learning_rate": 1.3700787401574806e-05,
3332
- "loss": 0.0569,
3333
  "step": 554
3334
  },
3335
  {
3336
  "epoch": 17.34,
3337
  "learning_rate": 1.3543307086614174e-05,
3338
- "loss": 0.0571,
3339
  "step": 555
3340
  },
3341
  {
3342
  "epoch": 17.38,
3343
  "learning_rate": 1.3385826771653545e-05,
3344
- "loss": 0.0428,
3345
  "step": 556
3346
  },
3347
  {
3348
  "epoch": 17.41,
3349
  "learning_rate": 1.3228346456692914e-05,
3350
- "loss": 0.0454,
3351
  "step": 557
3352
  },
3353
  {
3354
  "epoch": 17.44,
3355
  "learning_rate": 1.3070866141732286e-05,
3356
- "loss": 0.0582,
3357
  "step": 558
3358
  },
3359
  {
3360
  "epoch": 17.47,
3361
  "learning_rate": 1.2913385826771655e-05,
3362
- "loss": 0.0775,
3363
  "step": 559
3364
  },
3365
  {
3366
  "epoch": 17.5,
3367
  "learning_rate": 1.2755905511811025e-05,
3368
- "loss": 0.0594,
3369
  "step": 560
3370
  },
3371
  {
3372
  "epoch": 17.53,
3373
  "learning_rate": 1.2598425196850394e-05,
3374
- "loss": 0.0387,
3375
  "step": 561
3376
  },
3377
  {
3378
  "epoch": 17.56,
3379
  "learning_rate": 1.2440944881889764e-05,
3380
- "loss": 0.074,
3381
  "step": 562
3382
  },
3383
  {
3384
  "epoch": 17.59,
3385
  "learning_rate": 1.2283464566929135e-05,
3386
- "loss": 0.06,
3387
  "step": 563
3388
  },
3389
  {
3390
  "epoch": 17.62,
3391
  "learning_rate": 1.2125984251968504e-05,
3392
- "loss": 0.0414,
3393
  "step": 564
3394
  },
3395
  {
3396
  "epoch": 17.66,
3397
  "learning_rate": 1.1968503937007874e-05,
3398
- "loss": 0.0665,
3399
  "step": 565
3400
  },
3401
  {
3402
  "epoch": 17.69,
3403
  "learning_rate": 1.1811023622047245e-05,
3404
- "loss": 0.0471,
3405
  "step": 566
3406
  },
3407
  {
3408
  "epoch": 17.72,
3409
  "learning_rate": 1.1653543307086615e-05,
3410
- "loss": 0.0648,
3411
  "step": 567
3412
  },
3413
  {
3414
  "epoch": 17.75,
3415
  "learning_rate": 1.1496062992125984e-05,
3416
- "loss": 0.1013,
3417
  "step": 568
3418
  },
3419
  {
3420
  "epoch": 17.78,
3421
  "learning_rate": 1.1338582677165354e-05,
3422
- "loss": 0.0966,
3423
  "step": 569
3424
  },
3425
  {
3426
  "epoch": 17.81,
3427
  "learning_rate": 1.1181102362204725e-05,
3428
- "loss": 0.0503,
3429
  "step": 570
3430
  },
3431
  {
3432
  "epoch": 17.84,
3433
  "learning_rate": 1.1023622047244095e-05,
3434
- "loss": 0.0754,
3435
  "step": 571
3436
  },
3437
  {
3438
  "epoch": 17.88,
3439
  "learning_rate": 1.0866141732283466e-05,
3440
- "loss": 0.0524,
3441
  "step": 572
3442
  },
3443
  {
3444
  "epoch": 17.91,
3445
  "learning_rate": 1.0708661417322835e-05,
3446
- "loss": 0.0478,
3447
  "step": 573
3448
  },
3449
  {
3450
  "epoch": 17.94,
3451
  "learning_rate": 1.0551181102362205e-05,
3452
- "loss": 0.065,
3453
  "step": 574
3454
  },
3455
  {
3456
  "epoch": 17.97,
3457
  "learning_rate": 1.0393700787401575e-05,
3458
- "loss": 0.0706,
3459
  "step": 575
3460
  },
3461
  {
3462
  "epoch": 18.0,
3463
  "learning_rate": 1.0236220472440946e-05,
3464
- "loss": 0.0774,
3465
  "step": 576
3466
  },
3467
  {
3468
  "epoch": 18.03,
3469
  "learning_rate": 1.0078740157480315e-05,
3470
- "loss": 0.0404,
3471
  "step": 577
3472
  },
3473
  {
3474
  "epoch": 18.06,
3475
  "learning_rate": 9.921259842519685e-06,
3476
- "loss": 0.0576,
3477
  "step": 578
3478
  },
3479
  {
3480
  "epoch": 18.09,
3481
  "learning_rate": 9.763779527559056e-06,
3482
- "loss": 0.0538,
3483
  "step": 579
3484
  },
3485
  {
3486
  "epoch": 18.12,
3487
  "learning_rate": 9.606299212598426e-06,
3488
- "loss": 0.0375,
3489
  "step": 580
3490
  },
3491
  {
3492
  "epoch": 18.16,
3493
  "learning_rate": 9.448818897637795e-06,
3494
- "loss": 0.0598,
3495
  "step": 581
3496
  },
3497
  {
3498
  "epoch": 18.19,
3499
  "learning_rate": 9.291338582677165e-06,
3500
- "loss": 0.0499,
3501
  "step": 582
3502
  },
3503
  {
3504
  "epoch": 18.22,
3505
  "learning_rate": 9.133858267716536e-06,
3506
- "loss": 0.089,
3507
  "step": 583
3508
  },
3509
  {
3510
  "epoch": 18.25,
3511
  "learning_rate": 8.976377952755906e-06,
3512
- "loss": 0.0617,
3513
  "step": 584
3514
  },
3515
  {
3516
  "epoch": 18.28,
3517
  "learning_rate": 8.818897637795275e-06,
3518
- "loss": 0.0477,
3519
  "step": 585
3520
  },
3521
  {
3522
  "epoch": 18.31,
3523
  "learning_rate": 8.661417322834646e-06,
3524
- "loss": 0.1235,
3525
  "step": 586
3526
  },
3527
  {
3528
  "epoch": 18.34,
3529
  "learning_rate": 8.503937007874016e-06,
3530
- "loss": 0.0346,
3531
  "step": 587
3532
  },
3533
  {
3534
  "epoch": 18.38,
3535
  "learning_rate": 8.346456692913387e-06,
3536
- "loss": 0.069,
3537
  "step": 588
3538
  },
3539
  {
3540
  "epoch": 18.41,
3541
  "learning_rate": 8.188976377952755e-06,
3542
- "loss": 0.0471,
3543
  "step": 589
3544
  },
3545
  {
3546
  "epoch": 18.44,
3547
  "learning_rate": 8.031496062992126e-06,
3548
- "loss": 0.0412,
3549
  "step": 590
3550
  },
3551
  {
3552
  "epoch": 18.47,
3553
  "learning_rate": 7.874015748031496e-06,
3554
- "loss": 0.1199,
3555
  "step": 591
3556
  },
3557
  {
3558
  "epoch": 18.5,
3559
  "learning_rate": 7.716535433070867e-06,
3560
- "loss": 0.0524,
3561
  "step": 592
3562
  },
3563
  {
3564
  "epoch": 18.53,
3565
  "learning_rate": 7.5590551181102365e-06,
3566
- "loss": 0.0411,
3567
  "step": 593
3568
  },
3569
  {
3570
  "epoch": 18.56,
3571
  "learning_rate": 7.401574803149607e-06,
3572
- "loss": 0.0552,
3573
  "step": 594
3574
  },
3575
  {
3576
  "epoch": 18.59,
3577
  "learning_rate": 7.244094488188977e-06,
3578
- "loss": 0.0759,
3579
  "step": 595
3580
  },
3581
  {
3582
  "epoch": 18.62,
3583
  "learning_rate": 7.086614173228347e-06,
3584
- "loss": 0.0627,
3585
  "step": 596
3586
  },
3587
  {
3588
  "epoch": 18.66,
3589
  "learning_rate": 6.929133858267717e-06,
3590
- "loss": 0.0487,
3591
  "step": 597
3592
  },
3593
  {
3594
  "epoch": 18.69,
3595
  "learning_rate": 6.771653543307087e-06,
3596
- "loss": 0.0669,
3597
  "step": 598
3598
  },
3599
  {
3600
  "epoch": 18.72,
3601
  "learning_rate": 6.614173228346457e-06,
3602
- "loss": 0.0599,
3603
  "step": 599
3604
  },
3605
  {
3606
  "epoch": 18.75,
3607
  "learning_rate": 6.456692913385827e-06,
3608
- "loss": 0.0631,
3609
  "step": 600
3610
  },
3611
  {
3612
  "epoch": 18.78,
3613
  "learning_rate": 6.299212598425197e-06,
3614
- "loss": 0.0605,
3615
  "step": 601
3616
  },
3617
  {
3618
  "epoch": 18.81,
3619
  "learning_rate": 6.141732283464567e-06,
3620
- "loss": 0.1267,
3621
  "step": 602
3622
  },
3623
  {
3624
  "epoch": 18.84,
3625
  "learning_rate": 5.984251968503937e-06,
3626
- "loss": 0.0362,
3627
  "step": 603
3628
  },
3629
  {
3630
  "epoch": 18.88,
3631
  "learning_rate": 5.8267716535433075e-06,
3632
- "loss": 0.0673,
3633
  "step": 604
3634
  },
3635
  {
3636
  "epoch": 18.91,
3637
  "learning_rate": 5.669291338582677e-06,
3638
- "loss": 0.0478,
3639
  "step": 605
3640
  },
3641
  {
3642
  "epoch": 18.94,
3643
  "learning_rate": 5.511811023622048e-06,
3644
- "loss": 0.0685,
3645
  "step": 606
3646
  },
3647
  {
3648
  "epoch": 18.97,
3649
  "learning_rate": 5.354330708661417e-06,
3650
- "loss": 0.0521,
3651
  "step": 607
3652
  },
3653
  {
3654
  "epoch": 19.0,
3655
  "learning_rate": 5.196850393700788e-06,
3656
- "loss": 0.058,
3657
  "step": 608
3658
  },
3659
  {
3660
  "epoch": 19.03,
3661
  "learning_rate": 5.039370078740157e-06,
3662
- "loss": 0.061,
3663
  "step": 609
3664
  },
3665
  {
3666
  "epoch": 19.06,
3667
  "learning_rate": 4.881889763779528e-06,
3668
- "loss": 0.1133,
3669
  "step": 610
3670
  },
3671
  {
3672
  "epoch": 19.09,
3673
  "learning_rate": 4.7244094488188975e-06,
3674
- "loss": 0.053,
3675
  "step": 611
3676
  },
3677
  {
3678
  "epoch": 19.12,
3679
  "learning_rate": 4.566929133858268e-06,
3680
- "loss": 0.0472,
3681
  "step": 612
3682
  },
3683
  {
3684
  "epoch": 19.16,
3685
  "learning_rate": 4.409448818897638e-06,
3686
- "loss": 0.0516,
3687
  "step": 613
3688
  },
3689
  {
3690
  "epoch": 19.19,
3691
  "learning_rate": 4.251968503937008e-06,
3692
- "loss": 0.0544,
3693
  "step": 614
3694
  },
3695
  {
3696
  "epoch": 19.22,
3697
  "learning_rate": 4.094488188976378e-06,
3698
- "loss": 0.0507,
3699
  "step": 615
3700
  },
3701
  {
3702
  "epoch": 19.25,
3703
  "learning_rate": 3.937007874015748e-06,
3704
- "loss": 0.045,
3705
  "step": 616
3706
  },
3707
  {
3708
  "epoch": 19.28,
3709
  "learning_rate": 3.7795275590551182e-06,
3710
- "loss": 0.0519,
3711
  "step": 617
3712
  },
3713
  {
3714
  "epoch": 19.31,
3715
  "learning_rate": 3.6220472440944883e-06,
3716
- "loss": 0.0701,
3717
  "step": 618
3718
  },
3719
  {
3720
  "epoch": 19.34,
3721
  "learning_rate": 3.4645669291338583e-06,
3722
- "loss": 0.0431,
3723
  "step": 619
3724
  },
3725
  {
3726
  "epoch": 19.38,
3727
  "learning_rate": 3.3070866141732284e-06,
3728
- "loss": 0.0552,
3729
  "step": 620
3730
  },
3731
  {
3732
  "epoch": 19.41,
3733
  "learning_rate": 3.1496062992125985e-06,
3734
- "loss": 0.0432,
3735
  "step": 621
3736
  },
3737
  {
3738
  "epoch": 19.44,
3739
  "learning_rate": 2.9921259842519685e-06,
3740
- "loss": 0.0522,
3741
  "step": 622
3742
  },
3743
  {
3744
  "epoch": 19.47,
3745
  "learning_rate": 2.8346456692913386e-06,
3746
- "loss": 0.0875,
3747
  "step": 623
3748
  },
3749
  {
3750
  "epoch": 19.5,
3751
  "learning_rate": 2.6771653543307086e-06,
3752
- "loss": 0.05,
3753
  "step": 624
3754
  },
3755
  {
3756
  "epoch": 19.53,
3757
  "learning_rate": 2.5196850393700787e-06,
3758
- "loss": 0.0519,
3759
  "step": 625
3760
  },
3761
  {
3762
  "epoch": 19.56,
3763
  "learning_rate": 2.3622047244094487e-06,
3764
- "loss": 0.0494,
3765
  "step": 626
3766
  },
3767
  {
3768
  "epoch": 19.59,
3769
  "learning_rate": 2.204724409448819e-06,
3770
- "loss": 0.0502,
3771
  "step": 627
3772
  },
3773
  {
3774
  "epoch": 19.62,
3775
  "learning_rate": 2.047244094488189e-06,
3776
- "loss": 0.0938,
3777
  "step": 628
3778
  },
3779
  {
3780
  "epoch": 19.66,
3781
  "learning_rate": 1.8897637795275591e-06,
3782
- "loss": 0.0515,
3783
  "step": 629
3784
  },
3785
  {
3786
  "epoch": 19.69,
3787
  "learning_rate": 1.7322834645669292e-06,
3788
- "loss": 0.0829,
3789
  "step": 630
3790
  },
3791
  {
3792
  "epoch": 19.72,
3793
  "learning_rate": 1.5748031496062992e-06,
3794
- "loss": 0.0483,
3795
  "step": 631
3796
  },
3797
  {
3798
  "epoch": 19.75,
3799
  "learning_rate": 1.4173228346456693e-06,
3800
- "loss": 0.0919,
3801
  "step": 632
3802
  },
3803
  {
3804
  "epoch": 19.78,
3805
  "learning_rate": 1.2598425196850393e-06,
3806
- "loss": 0.037,
3807
  "step": 633
3808
  },
3809
  {
3810
  "epoch": 19.81,
3811
  "learning_rate": 1.1023622047244094e-06,
3812
- "loss": 0.0556,
3813
  "step": 634
3814
  },
3815
  {
3816
  "epoch": 19.84,
3817
  "learning_rate": 9.448818897637796e-07,
3818
- "loss": 0.0525,
3819
  "step": 635
3820
  },
3821
  {
3822
  "epoch": 19.88,
3823
  "learning_rate": 7.874015748031496e-07,
3824
- "loss": 0.0479,
3825
  "step": 636
3826
  },
3827
  {
3828
  "epoch": 19.91,
3829
  "learning_rate": 6.299212598425197e-07,
3830
- "loss": 0.0469,
3831
  "step": 637
3832
  },
3833
  {
3834
  "epoch": 19.94,
3835
  "learning_rate": 4.724409448818898e-07,
3836
- "loss": 0.0396,
3837
  "step": 638
3838
  },
3839
  {
3840
  "epoch": 19.97,
3841
  "learning_rate": 3.1496062992125984e-07,
3842
- "loss": 0.0834,
3843
  "step": 639
3844
  },
3845
  {
3846
  "epoch": 20.0,
3847
  "learning_rate": 1.5748031496062992e-07,
3848
- "loss": 0.0539,
3849
  "step": 640
3850
  },
3851
  {
3852
  "epoch": 20.0,
3853
  "step": 640,
3854
- "total_flos": 2.981403046182912e+16,
3855
- "train_loss": 0.31634302906750233,
3856
- "train_runtime": 1044.5949,
3857
- "train_samples_per_second": 2.451,
3858
- "train_steps_per_second": 0.613
3859
  }
3860
  ],
3861
  "logging_steps": 1,
3862
  "max_steps": 640,
3863
  "num_train_epochs": 20,
3864
  "save_steps": 500,
3865
- "total_flos": 2.981403046182912e+16,
3866
  "trial_name": null,
3867
  "trial_params": null
3868
  }
 
11
  {
12
  "epoch": 0.03,
13
  "learning_rate": 2e-05,
14
+ "loss": 2.5069,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.06,
19
  "learning_rate": 4e-05,
20
+ "loss": 2.1327,
21
  "step": 2
22
  },
23
  {
24
  "epoch": 0.09,
25
  "learning_rate": 6e-05,
26
+ "loss": 2.4537,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.12,
31
  "learning_rate": 8e-05,
32
+ "loss": 2.3612,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.16,
37
  "learning_rate": 0.0001,
38
+ "loss": 2.3904,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.19,
43
  "learning_rate": 9.984251968503937e-05,
44
+ "loss": 2.0011,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.22,
49
  "learning_rate": 9.968503937007875e-05,
50
+ "loss": 2.1121,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.25,
55
  "learning_rate": 9.952755905511811e-05,
56
+ "loss": 1.918,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.28,
61
  "learning_rate": 9.937007874015748e-05,
62
+ "loss": 1.8568,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.31,
67
  "learning_rate": 9.921259842519686e-05,
68
+ "loss": 1.7663,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.34,
73
  "learning_rate": 9.905511811023622e-05,
74
+ "loss": 1.6871,
75
  "step": 11
76
  },
77
  {
78
  "epoch": 0.38,
79
  "learning_rate": 9.88976377952756e-05,
80
+ "loss": 1.5877,
81
  "step": 12
82
  },
83
  {
84
  "epoch": 0.41,
85
  "learning_rate": 9.874015748031497e-05,
86
+ "loss": 1.897,
87
  "step": 13
88
  },
89
  {
90
  "epoch": 0.44,
91
+ "learning_rate": 9.874015748031497e-05,
92
+ "loss": 1.7996,
93
  "step": 14
94
  },
95
  {
96
  "epoch": 0.47,
97
+ "learning_rate": 9.858267716535433e-05,
98
+ "loss": 1.8437,
99
  "step": 15
100
  },
101
  {
102
  "epoch": 0.5,
103
+ "learning_rate": 9.842519685039371e-05,
104
+ "loss": 1.7606,
105
  "step": 16
106
  },
107
  {
108
  "epoch": 0.53,
109
+ "learning_rate": 9.826771653543308e-05,
110
+ "loss": 1.7194,
111
  "step": 17
112
  },
113
  {
114
  "epoch": 0.56,
115
+ "learning_rate": 9.811023622047244e-05,
116
+ "loss": 1.8213,
117
  "step": 18
118
  },
119
  {
120
  "epoch": 0.59,
121
+ "learning_rate": 9.795275590551182e-05,
122
+ "loss": 1.6995,
123
  "step": 19
124
  },
125
  {
126
  "epoch": 0.62,
127
  "learning_rate": 9.779527559055119e-05,
128
+ "loss": 1.5309,
129
  "step": 20
130
  },
131
  {
132
  "epoch": 0.66,
133
  "learning_rate": 9.763779527559055e-05,
134
+ "loss": 1.4977,
135
  "step": 21
136
  },
137
  {
138
  "epoch": 0.69,
139
  "learning_rate": 9.748031496062993e-05,
140
+ "loss": 1.6145,
141
  "step": 22
142
  },
143
  {
144
  "epoch": 0.72,
145
  "learning_rate": 9.73228346456693e-05,
146
+ "loss": 1.6686,
147
  "step": 23
148
  },
149
  {
150
  "epoch": 0.75,
151
  "learning_rate": 9.716535433070866e-05,
152
+ "loss": 1.3646,
153
  "step": 24
154
  },
155
  {
156
  "epoch": 0.78,
157
  "learning_rate": 9.700787401574803e-05,
158
+ "loss": 1.5323,
159
  "step": 25
160
  },
161
  {
162
  "epoch": 0.81,
163
  "learning_rate": 9.68503937007874e-05,
164
+ "loss": 1.4045,
165
  "step": 26
166
  },
167
  {
168
  "epoch": 0.84,
169
  "learning_rate": 9.669291338582677e-05,
170
+ "loss": 1.6301,
171
  "step": 27
172
  },
173
  {
174
  "epoch": 0.88,
175
  "learning_rate": 9.653543307086614e-05,
176
+ "loss": 1.4687,
177
  "step": 28
178
  },
179
  {
180
  "epoch": 0.91,
181
  "learning_rate": 9.637795275590552e-05,
182
+ "loss": 1.4465,
183
  "step": 29
184
  },
185
  {
186
  "epoch": 0.94,
187
  "learning_rate": 9.622047244094488e-05,
188
+ "loss": 1.3959,
189
  "step": 30
190
  },
191
  {
192
  "epoch": 0.97,
193
  "learning_rate": 9.606299212598425e-05,
194
+ "loss": 1.4754,
195
  "step": 31
196
  },
197
  {
198
  "epoch": 1.0,
199
  "learning_rate": 9.590551181102363e-05,
200
+ "loss": 1.445,
201
  "step": 32
202
  },
203
  {
204
  "epoch": 1.03,
205
  "learning_rate": 9.574803149606299e-05,
206
+ "loss": 1.4998,
207
  "step": 33
208
  },
209
  {
210
  "epoch": 1.06,
211
  "learning_rate": 9.559055118110236e-05,
212
+ "loss": 1.5282,
213
  "step": 34
214
  },
215
  {
216
  "epoch": 1.09,
217
  "learning_rate": 9.543307086614174e-05,
218
+ "loss": 1.1835,
219
  "step": 35
220
  },
221
  {
222
  "epoch": 1.12,
223
  "learning_rate": 9.52755905511811e-05,
224
+ "loss": 1.3461,
225
  "step": 36
226
  },
227
  {
228
  "epoch": 1.16,
229
  "learning_rate": 9.511811023622048e-05,
230
+ "loss": 1.2879,
231
  "step": 37
232
  },
233
  {
234
  "epoch": 1.19,
235
  "learning_rate": 9.496062992125985e-05,
236
+ "loss": 1.293,
237
  "step": 38
238
  },
239
  {
240
  "epoch": 1.22,
241
  "learning_rate": 9.480314960629921e-05,
242
+ "loss": 1.2846,
243
  "step": 39
244
  },
245
  {
246
  "epoch": 1.25,
247
  "learning_rate": 9.464566929133859e-05,
248
+ "loss": 1.3323,
249
  "step": 40
250
  },
251
  {
252
  "epoch": 1.28,
253
  "learning_rate": 9.448818897637796e-05,
254
+ "loss": 1.3537,
255
  "step": 41
256
  },
257
  {
258
  "epoch": 1.31,
259
  "learning_rate": 9.433070866141732e-05,
260
+ "loss": 1.3553,
261
  "step": 42
262
  },
263
  {
264
  "epoch": 1.34,
265
  "learning_rate": 9.41732283464567e-05,
266
+ "loss": 1.2661,
267
  "step": 43
268
  },
269
  {
270
  "epoch": 1.38,
271
  "learning_rate": 9.401574803149607e-05,
272
+ "loss": 1.345,
273
  "step": 44
274
  },
275
  {
276
  "epoch": 1.41,
277
  "learning_rate": 9.385826771653545e-05,
278
+ "loss": 1.2593,
279
  "step": 45
280
  },
281
  {
282
  "epoch": 1.44,
283
  "learning_rate": 9.370078740157481e-05,
284
+ "loss": 1.1177,
285
  "step": 46
286
  },
287
  {
288
  "epoch": 1.47,
289
  "learning_rate": 9.354330708661418e-05,
290
+ "loss": 1.2686,
291
  "step": 47
292
  },
293
  {
294
  "epoch": 1.5,
295
  "learning_rate": 9.338582677165355e-05,
296
+ "loss": 1.1689,
297
  "step": 48
298
  },
299
  {
300
  "epoch": 1.53,
301
  "learning_rate": 9.322834645669292e-05,
302
+ "loss": 1.145,
303
  "step": 49
304
  },
305
  {
306
  "epoch": 1.56,
307
  "learning_rate": 9.307086614173229e-05,
308
+ "loss": 1.5517,
309
  "step": 50
310
  },
311
  {
312
  "epoch": 1.59,
313
  "learning_rate": 9.291338582677166e-05,
314
+ "loss": 1.1943,
315
  "step": 51
316
  },
317
  {
318
  "epoch": 1.62,
319
  "learning_rate": 9.275590551181103e-05,
320
+ "loss": 1.2215,
321
  "step": 52
322
  },
323
  {
324
  "epoch": 1.66,
325
  "learning_rate": 9.259842519685041e-05,
326
+ "loss": 1.1956,
327
  "step": 53
328
  },
329
  {
330
  "epoch": 1.69,
331
  "learning_rate": 9.244094488188977e-05,
332
+ "loss": 1.1969,
333
  "step": 54
334
  },
335
  {
336
  "epoch": 1.72,
337
  "learning_rate": 9.228346456692914e-05,
338
+ "loss": 1.2588,
339
  "step": 55
340
  },
341
  {
342
  "epoch": 1.75,
343
  "learning_rate": 9.21259842519685e-05,
344
+ "loss": 1.2618,
345
  "step": 56
346
  },
347
  {
348
  "epoch": 1.78,
349
  "learning_rate": 9.196850393700787e-05,
350
+ "loss": 1.0742,
351
  "step": 57
352
  },
353
  {
354
  "epoch": 1.81,
355
  "learning_rate": 9.181102362204725e-05,
356
+ "loss": 1.2993,
357
  "step": 58
358
  },
359
  {
360
  "epoch": 1.84,
361
  "learning_rate": 9.165354330708661e-05,
362
+ "loss": 1.229,
363
  "step": 59
364
  },
365
  {
366
  "epoch": 1.88,
367
  "learning_rate": 9.149606299212598e-05,
368
+ "loss": 1.5104,
369
  "step": 60
370
  },
371
  {
372
  "epoch": 1.91,
373
  "learning_rate": 9.133858267716536e-05,
374
+ "loss": 1.34,
375
  "step": 61
376
  },
377
  {
378
  "epoch": 1.94,
379
  "learning_rate": 9.118110236220472e-05,
380
+ "loss": 1.3365,
381
  "step": 62
382
  },
383
  {
384
  "epoch": 1.97,
385
  "learning_rate": 9.102362204724409e-05,
386
+ "loss": 1.3477,
387
  "step": 63
388
  },
389
  {
390
  "epoch": 2.0,
391
  "learning_rate": 9.086614173228347e-05,
392
+ "loss": 1.1649,
393
  "step": 64
394
  },
395
  {
396
  "epoch": 2.03,
397
  "learning_rate": 9.070866141732283e-05,
398
+ "loss": 1.0887,
399
  "step": 65
400
  },
401
  {
402
  "epoch": 2.06,
403
  "learning_rate": 9.05511811023622e-05,
404
+ "loss": 1.0539,
405
  "step": 66
406
  },
407
  {
408
  "epoch": 2.09,
409
  "learning_rate": 9.039370078740158e-05,
410
+ "loss": 1.2224,
411
  "step": 67
412
  },
413
  {
414
  "epoch": 2.12,
415
  "learning_rate": 9.023622047244094e-05,
416
+ "loss": 0.7727,
417
  "step": 68
418
  },
419
  {
420
  "epoch": 2.16,
421
  "learning_rate": 9.007874015748032e-05,
422
+ "loss": 0.8996,
423
  "step": 69
424
  },
425
  {
426
  "epoch": 2.19,
427
  "learning_rate": 8.992125984251969e-05,
428
+ "loss": 0.8348,
429
  "step": 70
430
  },
431
  {
432
  "epoch": 2.22,
433
  "learning_rate": 8.976377952755905e-05,
434
+ "loss": 0.9522,
435
  "step": 71
436
  },
437
  {
438
  "epoch": 2.25,
439
  "learning_rate": 8.960629921259843e-05,
440
+ "loss": 0.9277,
441
  "step": 72
442
  },
443
  {
444
  "epoch": 2.28,
445
  "learning_rate": 8.94488188976378e-05,
446
+ "loss": 0.9566,
447
  "step": 73
448
  },
449
  {
450
  "epoch": 2.31,
451
  "learning_rate": 8.929133858267716e-05,
452
+ "loss": 0.9179,
453
  "step": 74
454
  },
455
  {
456
  "epoch": 2.34,
457
  "learning_rate": 8.913385826771654e-05,
458
+ "loss": 0.9606,
459
  "step": 75
460
  },
461
  {
462
  "epoch": 2.38,
463
  "learning_rate": 8.897637795275591e-05,
464
+ "loss": 0.8982,
465
  "step": 76
466
  },
467
  {
468
  "epoch": 2.41,
469
  "learning_rate": 8.881889763779529e-05,
470
+ "loss": 0.9102,
471
  "step": 77
472
  },
473
  {
474
  "epoch": 2.44,
475
  "learning_rate": 8.866141732283465e-05,
476
+ "loss": 0.8879,
477
  "step": 78
478
  },
479
  {
480
  "epoch": 2.47,
481
  "learning_rate": 8.850393700787402e-05,
482
+ "loss": 0.6698,
483
  "step": 79
484
  },
485
  {
486
  "epoch": 2.5,
487
  "learning_rate": 8.83464566929134e-05,
488
+ "loss": 0.8272,
489
  "step": 80
490
  },
491
  {
492
  "epoch": 2.53,
493
  "learning_rate": 8.818897637795276e-05,
494
+ "loss": 1.0421,
495
  "step": 81
496
  },
497
  {
498
  "epoch": 2.56,
499
  "learning_rate": 8.803149606299213e-05,
500
+ "loss": 0.9424,
501
  "step": 82
502
  },
503
  {
504
  "epoch": 2.59,
505
  "learning_rate": 8.78740157480315e-05,
506
+ "loss": 0.9943,
507
  "step": 83
508
  },
509
  {
510
  "epoch": 2.62,
511
  "learning_rate": 8.771653543307087e-05,
512
+ "loss": 1.1647,
513
  "step": 84
514
  },
515
  {
516
  "epoch": 2.66,
517
  "learning_rate": 8.755905511811025e-05,
518
+ "loss": 1.0718,
519
  "step": 85
520
  },
521
  {
522
  "epoch": 2.69,
523
  "learning_rate": 8.740157480314962e-05,
524
+ "loss": 0.9349,
525
  "step": 86
526
  },
527
  {
528
  "epoch": 2.72,
529
  "learning_rate": 8.724409448818898e-05,
530
+ "loss": 1.014,
531
  "step": 87
532
  },
533
  {
534
  "epoch": 2.75,
535
  "learning_rate": 8.708661417322835e-05,
536
+ "loss": 1.2487,
537
  "step": 88
538
  },
539
  {
540
  "epoch": 2.78,
541
  "learning_rate": 8.692913385826773e-05,
542
+ "loss": 0.8633,
543
  "step": 89
544
  },
545
  {
546
  "epoch": 2.81,
547
  "learning_rate": 8.677165354330709e-05,
548
+ "loss": 0.9087,
549
  "step": 90
550
  },
551
  {
552
  "epoch": 2.84,
553
  "learning_rate": 8.661417322834646e-05,
554
+ "loss": 1.1013,
555
  "step": 91
556
  },
557
  {
558
  "epoch": 2.88,
559
  "learning_rate": 8.645669291338582e-05,
560
+ "loss": 1.0405,
561
  "step": 92
562
  },
563
  {
564
  "epoch": 2.91,
565
  "learning_rate": 8.62992125984252e-05,
566
+ "loss": 0.7865,
567
  "step": 93
568
  },
569
  {
570
  "epoch": 2.94,
571
  "learning_rate": 8.614173228346457e-05,
572
+ "loss": 0.9032,
573
  "step": 94
574
  },
575
  {
576
  "epoch": 2.97,
577
  "learning_rate": 8.598425196850393e-05,
578
+ "loss": 0.9319,
579
  "step": 95
580
  },
581
  {
582
  "epoch": 3.0,
583
  "learning_rate": 8.582677165354331e-05,
584
+ "loss": 0.8459,
585
  "step": 96
586
  },
587
  {
588
  "epoch": 3.03,
589
  "learning_rate": 8.566929133858268e-05,
590
+ "loss": 0.5466,
591
  "step": 97
592
  },
593
  {
594
  "epoch": 3.06,
595
  "learning_rate": 8.551181102362204e-05,
596
+ "loss": 0.4219,
597
  "step": 98
598
  },
599
  {
600
  "epoch": 3.09,
601
  "learning_rate": 8.535433070866142e-05,
602
+ "loss": 0.7185,
603
  "step": 99
604
  },
605
  {
606
  "epoch": 3.12,
607
  "learning_rate": 8.519685039370079e-05,
608
+ "loss": 0.5795,
609
  "step": 100
610
  },
611
  {
612
  "epoch": 3.16,
613
  "learning_rate": 8.503937007874016e-05,
614
+ "loss": 0.5285,
615
  "step": 101
616
  },
617
  {
618
  "epoch": 3.19,
619
  "learning_rate": 8.488188976377953e-05,
620
+ "loss": 0.6236,
621
  "step": 102
622
  },
623
  {
624
  "epoch": 3.22,
625
  "learning_rate": 8.47244094488189e-05,
626
+ "loss": 0.6287,
627
  "step": 103
628
  },
629
  {
630
  "epoch": 3.25,
631
  "learning_rate": 8.456692913385827e-05,
632
+ "loss": 0.4723,
633
  "step": 104
634
  },
635
  {
636
  "epoch": 3.28,
637
  "learning_rate": 8.440944881889764e-05,
638
+ "loss": 0.4926,
639
  "step": 105
640
  },
641
  {
642
  "epoch": 3.31,
643
  "learning_rate": 8.4251968503937e-05,
644
+ "loss": 0.5282,
645
  "step": 106
646
  },
647
  {
648
  "epoch": 3.34,
649
  "learning_rate": 8.409448818897638e-05,
650
+ "loss": 0.4181,
651
  "step": 107
652
  },
653
  {
654
  "epoch": 3.38,
655
  "learning_rate": 8.393700787401575e-05,
656
+ "loss": 0.5563,
657
  "step": 108
658
  },
659
  {
660
  "epoch": 3.41,
661
  "learning_rate": 8.377952755905513e-05,
662
+ "loss": 0.3991,
663
  "step": 109
664
  },
665
  {
666
  "epoch": 3.44,
667
  "learning_rate": 8.36220472440945e-05,
668
+ "loss": 0.4067,
669
  "step": 110
670
  },
671
  {
672
  "epoch": 3.47,
673
  "learning_rate": 8.346456692913386e-05,
674
+ "loss": 0.538,
675
  "step": 111
676
  },
677
  {
678
  "epoch": 3.5,
679
  "learning_rate": 8.330708661417324e-05,
680
+ "loss": 0.5638,
681
  "step": 112
682
  },
683
  {
684
  "epoch": 3.53,
685
  "learning_rate": 8.31496062992126e-05,
686
+ "loss": 0.4859,
687
  "step": 113
688
  },
689
  {
690
  "epoch": 3.56,
691
  "learning_rate": 8.299212598425197e-05,
692
+ "loss": 0.4579,
693
  "step": 114
694
  },
695
  {
696
  "epoch": 3.59,
697
  "learning_rate": 8.283464566929135e-05,
698
+ "loss": 0.7117,
699
  "step": 115
700
  },
701
  {
702
  "epoch": 3.62,
703
  "learning_rate": 8.267716535433071e-05,
704
+ "loss": 0.5797,
705
  "step": 116
706
  },
707
  {
708
  "epoch": 3.66,
709
  "learning_rate": 8.251968503937009e-05,
710
+ "loss": 0.5797,
711
  "step": 117
712
  },
713
  {
714
  "epoch": 3.69,
715
  "learning_rate": 8.236220472440946e-05,
716
+ "loss": 0.5154,
717
  "step": 118
718
  },
719
  {
720
  "epoch": 3.72,
721
  "learning_rate": 8.220472440944882e-05,
722
+ "loss": 0.5505,
723
  "step": 119
724
  },
725
  {
726
  "epoch": 3.75,
727
  "learning_rate": 8.20472440944882e-05,
728
+ "loss": 0.6365,
729
  "step": 120
730
  },
731
  {
732
  "epoch": 3.78,
733
  "learning_rate": 8.188976377952757e-05,
734
+ "loss": 0.5842,
735
  "step": 121
736
  },
737
  {
738
  "epoch": 3.81,
739
  "learning_rate": 8.173228346456693e-05,
740
+ "loss": 0.6596,
741
  "step": 122
742
  },
743
  {
744
  "epoch": 3.84,
745
  "learning_rate": 8.15748031496063e-05,
746
+ "loss": 0.544,
747
  "step": 123
748
  },
749
  {
750
  "epoch": 3.88,
751
  "learning_rate": 8.141732283464568e-05,
752
+ "loss": 0.6828,
753
  "step": 124
754
  },
755
  {
756
  "epoch": 3.91,
757
  "learning_rate": 8.125984251968504e-05,
758
+ "loss": 0.5819,
759
  "step": 125
760
  },
761
  {
762
  "epoch": 3.94,
763
  "learning_rate": 8.110236220472441e-05,
764
+ "loss": 0.5688,
765
  "step": 126
766
  },
767
  {
768
  "epoch": 3.97,
769
  "learning_rate": 8.094488188976377e-05,
770
+ "loss": 0.5328,
771
  "step": 127
772
  },
773
  {
774
  "epoch": 4.0,
775
  "learning_rate": 8.078740157480315e-05,
776
+ "loss": 0.5887,
777
  "step": 128
778
  },
779
  {
780
  "epoch": 4.03,
781
  "learning_rate": 8.062992125984252e-05,
782
+ "loss": 0.3647,
783
  "step": 129
784
  },
785
  {
786
  "epoch": 4.06,
787
  "learning_rate": 8.047244094488188e-05,
788
+ "loss": 0.3172,
789
  "step": 130
790
  },
791
  {
792
  "epoch": 4.09,
793
  "learning_rate": 8.031496062992126e-05,
794
+ "loss": 0.3931,
795
  "step": 131
796
  },
797
  {
798
  "epoch": 4.12,
799
  "learning_rate": 8.015748031496063e-05,
800
+ "loss": 0.2472,
801
  "step": 132
802
  },
803
  {
804
  "epoch": 4.16,
805
  "learning_rate": 8e-05,
806
+ "loss": 0.1936,
807
  "step": 133
808
  },
809
  {
810
  "epoch": 4.19,
811
  "learning_rate": 7.984251968503937e-05,
812
+ "loss": 0.2755,
813
  "step": 134
814
  },
815
  {
816
  "epoch": 4.22,
817
  "learning_rate": 7.968503937007874e-05,
818
+ "loss": 0.3037,
819
  "step": 135
820
  },
821
  {
822
  "epoch": 4.25,
823
  "learning_rate": 7.952755905511812e-05,
824
+ "loss": 0.2445,
825
  "step": 136
826
  },
827
  {
828
  "epoch": 4.28,
829
  "learning_rate": 7.937007874015748e-05,
830
+ "loss": 0.1991,
831
  "step": 137
832
  },
833
  {
834
  "epoch": 4.31,
835
  "learning_rate": 7.921259842519685e-05,
836
+ "loss": 0.249,
837
  "step": 138
838
  },
839
  {
840
  "epoch": 4.34,
841
  "learning_rate": 7.905511811023623e-05,
842
+ "loss": 0.3154,
843
  "step": 139
844
  },
845
  {
846
  "epoch": 4.38,
847
  "learning_rate": 7.889763779527559e-05,
848
+ "loss": 0.218,
849
  "step": 140
850
  },
851
  {
852
  "epoch": 4.41,
853
  "learning_rate": 7.874015748031497e-05,
854
+ "loss": 0.3027,
855
  "step": 141
856
  },
857
  {
858
  "epoch": 4.44,
859
  "learning_rate": 7.858267716535434e-05,
860
+ "loss": 0.2984,
861
  "step": 142
862
  },
863
  {
864
  "epoch": 4.47,
865
  "learning_rate": 7.84251968503937e-05,
866
+ "loss": 0.2319,
867
  "step": 143
868
  },
869
  {
870
  "epoch": 4.5,
871
  "learning_rate": 7.826771653543308e-05,
872
+ "loss": 0.2731,
873
  "step": 144
874
  },
875
  {
876
  "epoch": 4.53,
877
  "learning_rate": 7.811023622047245e-05,
878
+ "loss": 0.2904,
879
  "step": 145
880
  },
881
  {
882
  "epoch": 4.56,
883
  "learning_rate": 7.795275590551181e-05,
884
+ "loss": 0.2438,
885
  "step": 146
886
  },
887
  {
888
  "epoch": 4.59,
889
  "learning_rate": 7.779527559055119e-05,
890
+ "loss": 0.3245,
891
  "step": 147
892
  },
893
  {
894
  "epoch": 4.62,
895
  "learning_rate": 7.763779527559056e-05,
896
+ "loss": 0.2738,
897
  "step": 148
898
  },
899
  {
900
  "epoch": 4.66,
901
  "learning_rate": 7.748031496062993e-05,
902
+ "loss": 0.2536,
903
  "step": 149
904
  },
905
  {
906
  "epoch": 4.69,
907
  "learning_rate": 7.73228346456693e-05,
908
+ "loss": 0.3255,
909
  "step": 150
910
  },
911
  {
912
  "epoch": 4.72,
913
  "learning_rate": 7.716535433070867e-05,
914
+ "loss": 0.2891,
915
  "step": 151
916
  },
917
  {
918
  "epoch": 4.75,
919
  "learning_rate": 7.700787401574804e-05,
920
+ "loss": 0.2648,
921
  "step": 152
922
  },
923
  {
924
  "epoch": 4.78,
925
  "learning_rate": 7.685039370078741e-05,
926
+ "loss": 0.2663,
927
  "step": 153
928
  },
929
  {
930
  "epoch": 4.81,
931
  "learning_rate": 7.669291338582677e-05,
932
+ "loss": 0.318,
933
  "step": 154
934
  },
935
  {
936
  "epoch": 4.84,
937
  "learning_rate": 7.653543307086615e-05,
938
+ "loss": 0.3181,
939
  "step": 155
940
  },
941
  {
942
  "epoch": 4.88,
943
  "learning_rate": 7.637795275590552e-05,
944
+ "loss": 0.2486,
945
  "step": 156
946
  },
947
  {
948
  "epoch": 4.91,
949
  "learning_rate": 7.622047244094488e-05,
950
+ "loss": 0.2911,
951
  "step": 157
952
  },
953
  {
954
  "epoch": 4.94,
955
  "learning_rate": 7.606299212598425e-05,
956
+ "loss": 0.3189,
957
  "step": 158
958
  },
959
  {
960
  "epoch": 4.97,
961
  "learning_rate": 7.590551181102362e-05,
962
+ "loss": 0.3104,
963
  "step": 159
964
  },
965
  {
966
  "epoch": 5.0,
967
  "learning_rate": 7.5748031496063e-05,
968
+ "loss": 0.2286,
969
  "step": 160
970
  },
971
  {
972
  "epoch": 5.03,
973
  "learning_rate": 7.559055118110236e-05,
974
+ "loss": 0.1934,
975
  "step": 161
976
  },
977
  {
978
  "epoch": 5.06,
979
  "learning_rate": 7.543307086614173e-05,
980
+ "loss": 0.1535,
981
  "step": 162
982
  },
983
  {
984
  "epoch": 5.09,
985
  "learning_rate": 7.52755905511811e-05,
986
+ "loss": 0.1791,
987
  "step": 163
988
  },
989
  {
990
  "epoch": 5.12,
991
  "learning_rate": 7.511811023622047e-05,
992
+ "loss": 0.144,
993
  "step": 164
994
  },
995
  {
996
  "epoch": 5.16,
997
  "learning_rate": 7.496062992125985e-05,
998
+ "loss": 0.1143,
999
  "step": 165
1000
  },
1001
  {
1002
  "epoch": 5.19,
1003
  "learning_rate": 7.480314960629921e-05,
1004
+ "loss": 0.1244,
1005
  "step": 166
1006
  },
1007
  {
1008
  "epoch": 5.22,
1009
  "learning_rate": 7.464566929133858e-05,
1010
+ "loss": 0.0934,
1011
  "step": 167
1012
  },
1013
  {
1014
  "epoch": 5.25,
1015
  "learning_rate": 7.448818897637796e-05,
1016
+ "loss": 0.1276,
1017
  "step": 168
1018
  },
1019
  {
1020
  "epoch": 5.28,
1021
  "learning_rate": 7.433070866141732e-05,
1022
+ "loss": 0.1416,
1023
  "step": 169
1024
  },
1025
  {
1026
  "epoch": 5.31,
1027
  "learning_rate": 7.417322834645669e-05,
1028
+ "loss": 0.1335,
1029
  "step": 170
1030
  },
1031
  {
1032
  "epoch": 5.34,
1033
  "learning_rate": 7.401574803149607e-05,
1034
+ "loss": 0.1368,
1035
  "step": 171
1036
  },
1037
  {
1038
  "epoch": 5.38,
1039
  "learning_rate": 7.385826771653543e-05,
1040
+ "loss": 0.1422,
1041
  "step": 172
1042
  },
1043
  {
1044
  "epoch": 5.41,
1045
  "learning_rate": 7.370078740157481e-05,
1046
+ "loss": 0.1306,
1047
  "step": 173
1048
  },
1049
  {
1050
  "epoch": 5.44,
1051
  "learning_rate": 7.354330708661418e-05,
1052
+ "loss": 0.1287,
1053
  "step": 174
1054
  },
1055
  {
1056
  "epoch": 5.47,
1057
  "learning_rate": 7.338582677165354e-05,
1058
+ "loss": 0.1026,
1059
  "step": 175
1060
  },
1061
  {
1062
  "epoch": 5.5,
1063
  "learning_rate": 7.322834645669292e-05,
1064
+ "loss": 0.1304,
1065
  "step": 176
1066
  },
1067
  {
1068
  "epoch": 5.53,
1069
  "learning_rate": 7.307086614173229e-05,
1070
+ "loss": 0.1497,
1071
  "step": 177
1072
  },
1073
  {
1074
  "epoch": 5.56,
1075
  "learning_rate": 7.291338582677165e-05,
1076
+ "loss": 0.1985,
1077
  "step": 178
1078
  },
1079
  {
1080
  "epoch": 5.59,
1081
  "learning_rate": 7.275590551181103e-05,
1082
+ "loss": 0.1248,
1083
  "step": 179
1084
  },
1085
  {
1086
  "epoch": 5.62,
1087
  "learning_rate": 7.25984251968504e-05,
1088
+ "loss": 0.1705,
1089
  "step": 180
1090
  },
1091
  {
1092
  "epoch": 5.66,
1093
  "learning_rate": 7.244094488188978e-05,
1094
+ "loss": 0.1435,
1095
  "step": 181
1096
  },
1097
  {
1098
  "epoch": 5.69,
1099
  "learning_rate": 7.228346456692914e-05,
1100
+ "loss": 0.1864,
1101
  "step": 182
1102
  },
1103
  {
1104
  "epoch": 5.72,
1105
  "learning_rate": 7.212598425196851e-05,
1106
+ "loss": 0.1399,
1107
  "step": 183
1108
  },
1109
  {
1110
  "epoch": 5.75,
1111
  "learning_rate": 7.196850393700789e-05,
1112
+ "loss": 0.1604,
1113
  "step": 184
1114
  },
1115
  {
1116
  "epoch": 5.78,
1117
  "learning_rate": 7.181102362204725e-05,
1118
+ "loss": 0.1391,
1119
  "step": 185
1120
  },
1121
  {
1122
  "epoch": 5.81,
1123
  "learning_rate": 7.165354330708662e-05,
1124
+ "loss": 0.1578,
1125
  "step": 186
1126
  },
1127
  {
1128
  "epoch": 5.84,
1129
  "learning_rate": 7.1496062992126e-05,
1130
+ "loss": 0.1467,
1131
  "step": 187
1132
  },
1133
  {
1134
  "epoch": 5.88,
1135
  "learning_rate": 7.133858267716536e-05,
1136
+ "loss": 0.1577,
1137
  "step": 188
1138
  },
1139
  {
1140
  "epoch": 5.91,
1141
  "learning_rate": 7.118110236220473e-05,
1142
+ "loss": 0.159,
1143
  "step": 189
1144
  },
1145
  {
1146
  "epoch": 5.94,
1147
  "learning_rate": 7.102362204724409e-05,
1148
+ "loss": 0.1589,
1149
  "step": 190
1150
  },
1151
  {
1152
  "epoch": 5.97,
1153
  "learning_rate": 7.086614173228347e-05,
1154
+ "loss": 0.1333,
1155
  "step": 191
1156
  },
1157
  {
1158
  "epoch": 6.0,
1159
  "learning_rate": 7.070866141732284e-05,
1160
+ "loss": 0.2101,
1161
  "step": 192
1162
  },
1163
  {
1164
  "epoch": 6.03,
1165
  "learning_rate": 7.05511811023622e-05,
1166
+ "loss": 0.0931,
1167
  "step": 193
1168
  },
1169
  {
1170
  "epoch": 6.06,
1171
  "learning_rate": 7.039370078740157e-05,
1172
+ "loss": 0.0615,
1173
  "step": 194
1174
  },
1175
  {
1176
  "epoch": 6.09,
1177
  "learning_rate": 7.023622047244095e-05,
1178
+ "loss": 0.0841,
1179
  "step": 195
1180
  },
1181
  {
1182
  "epoch": 6.12,
1183
  "learning_rate": 7.007874015748031e-05,
1184
+ "loss": 0.0833,
1185
  "step": 196
1186
  },
1187
  {
1188
  "epoch": 6.16,
1189
  "learning_rate": 6.992125984251969e-05,
1190
+ "loss": 0.1018,
1191
  "step": 197
1192
  },
1193
  {
1194
  "epoch": 6.19,
1195
  "learning_rate": 6.976377952755906e-05,
1196
+ "loss": 0.0827,
1197
  "step": 198
1198
  },
1199
  {
1200
  "epoch": 6.22,
1201
  "learning_rate": 6.960629921259842e-05,
1202
+ "loss": 0.0786,
1203
  "step": 199
1204
  },
1205
  {
1206
  "epoch": 6.25,
1207
  "learning_rate": 6.94488188976378e-05,
1208
+ "loss": 0.0741,
1209
  "step": 200
1210
  },
1211
  {
1212
  "epoch": 6.28,
1213
  "learning_rate": 6.929133858267717e-05,
1214
+ "loss": 0.0911,
1215
  "step": 201
1216
  },
1217
  {
1218
  "epoch": 6.31,
1219
  "learning_rate": 6.913385826771653e-05,
1220
+ "loss": 0.1057,
1221
  "step": 202
1222
  },
1223
  {
1224
  "epoch": 6.34,
1225
  "learning_rate": 6.897637795275591e-05,
1226
+ "loss": 0.0598,
1227
  "step": 203
1228
  },
1229
  {
1230
  "epoch": 6.38,
1231
  "learning_rate": 6.881889763779528e-05,
1232
+ "loss": 0.1423,
1233
  "step": 204
1234
  },
1235
  {
1236
  "epoch": 6.41,
1237
  "learning_rate": 6.866141732283465e-05,
1238
+ "loss": 0.0843,
1239
  "step": 205
1240
  },
1241
  {
1242
  "epoch": 6.44,
1243
  "learning_rate": 6.850393700787402e-05,
1244
+ "loss": 0.0664,
1245
  "step": 206
1246
  },
1247
  {
1248
  "epoch": 6.47,
1249
  "learning_rate": 6.834645669291338e-05,
1250
+ "loss": 0.0685,
1251
  "step": 207
1252
  },
1253
  {
 
1259
  {
1260
  "epoch": 6.53,
1261
  "learning_rate": 6.803149606299213e-05,
1262
+ "loss": 0.1523,
1263
  "step": 209
1264
  },
1265
  {
1266
  "epoch": 6.56,
1267
  "learning_rate": 6.78740157480315e-05,
1268
+ "loss": 0.0851,
1269
  "step": 210
1270
  },
1271
  {
1272
  "epoch": 6.59,
1273
  "learning_rate": 6.771653543307087e-05,
1274
+ "loss": 0.1009,
1275
  "step": 211
1276
  },
1277
  {
1278
  "epoch": 6.62,
1279
  "learning_rate": 6.755905511811024e-05,
1280
+ "loss": 0.1057,
1281
  "step": 212
1282
  },
1283
  {
1284
  "epoch": 6.66,
1285
  "learning_rate": 6.740157480314962e-05,
1286
+ "loss": 0.0816,
1287
  "step": 213
1288
  },
1289
  {
1290
  "epoch": 6.69,
1291
  "learning_rate": 6.724409448818898e-05,
1292
+ "loss": 0.1062,
1293
  "step": 214
1294
  },
1295
  {
1296
  "epoch": 6.72,
1297
  "learning_rate": 6.708661417322835e-05,
1298
+ "loss": 0.1022,
1299
  "step": 215
1300
  },
1301
  {
1302
  "epoch": 6.75,
1303
  "learning_rate": 6.692913385826773e-05,
1304
+ "loss": 0.1159,
1305
  "step": 216
1306
  },
1307
  {
1308
  "epoch": 6.78,
1309
  "learning_rate": 6.677165354330709e-05,
1310
+ "loss": 0.0964,
1311
  "step": 217
1312
  },
1313
  {
1314
  "epoch": 6.81,
1315
  "learning_rate": 6.661417322834646e-05,
1316
+ "loss": 0.1399,
1317
  "step": 218
1318
  },
1319
  {
1320
  "epoch": 6.84,
1321
  "learning_rate": 6.645669291338584e-05,
1322
+ "loss": 0.1076,
1323
  "step": 219
1324
  },
1325
  {
1326
  "epoch": 6.88,
1327
  "learning_rate": 6.62992125984252e-05,
1328
+ "loss": 0.1024,
1329
  "step": 220
1330
  },
1331
  {
1332
  "epoch": 6.91,
1333
  "learning_rate": 6.614173228346457e-05,
1334
+ "loss": 0.1096,
1335
  "step": 221
1336
  },
1337
  {
1338
  "epoch": 6.94,
1339
  "learning_rate": 6.598425196850395e-05,
1340
+ "loss": 0.1007,
1341
  "step": 222
1342
  },
1343
  {
1344
  "epoch": 6.97,
1345
  "learning_rate": 6.582677165354331e-05,
1346
+ "loss": 0.1329,
1347
  "step": 223
1348
  },
1349
  {
1350
  "epoch": 7.0,
1351
  "learning_rate": 6.566929133858268e-05,
1352
+ "loss": 0.0809,
1353
  "step": 224
1354
  },
1355
  {
1356
  "epoch": 7.03,
1357
  "learning_rate": 6.551181102362204e-05,
1358
+ "loss": 0.0602,
1359
  "step": 225
1360
  },
1361
  {
1362
  "epoch": 7.06,
1363
  "learning_rate": 6.535433070866141e-05,
1364
+ "loss": 0.0753,
1365
  "step": 226
1366
  },
1367
  {
1368
  "epoch": 7.09,
1369
  "learning_rate": 6.519685039370079e-05,
1370
+ "loss": 0.0541,
1371
  "step": 227
1372
  },
1373
  {
1374
  "epoch": 7.12,
1375
  "learning_rate": 6.503937007874015e-05,
1376
+ "loss": 0.068,
1377
  "step": 228
1378
  },
1379
  {
1380
  "epoch": 7.16,
1381
  "learning_rate": 6.488188976377953e-05,
1382
+ "loss": 0.0741,
1383
  "step": 229
1384
  },
1385
  {
1386
  "epoch": 7.19,
1387
  "learning_rate": 6.47244094488189e-05,
1388
+ "loss": 0.0823,
1389
  "step": 230
1390
  },
1391
  {
1392
  "epoch": 7.22,
1393
  "learning_rate": 6.456692913385826e-05,
1394
+ "loss": 0.068,
1395
  "step": 231
1396
  },
1397
  {
1398
  "epoch": 7.25,
1399
  "learning_rate": 6.440944881889764e-05,
1400
+ "loss": 0.0605,
1401
  "step": 232
1402
  },
1403
  {
1404
  "epoch": 7.28,
1405
  "learning_rate": 6.425196850393701e-05,
1406
+ "loss": 0.0493,
1407
  "step": 233
1408
  },
1409
  {
1410
  "epoch": 7.31,
1411
  "learning_rate": 6.409448818897637e-05,
1412
+ "loss": 0.0728,
1413
  "step": 234
1414
  },
1415
  {
1416
  "epoch": 7.34,
1417
  "learning_rate": 6.393700787401575e-05,
1418
+ "loss": 0.0506,
1419
  "step": 235
1420
  },
1421
  {
1422
  "epoch": 7.38,
1423
  "learning_rate": 6.377952755905512e-05,
1424
+ "loss": 0.0552,
1425
  "step": 236
1426
  },
1427
  {
1428
  "epoch": 7.41,
1429
  "learning_rate": 6.36220472440945e-05,
1430
+ "loss": 0.058,
1431
  "step": 237
1432
  },
1433
  {
1434
  "epoch": 7.44,
1435
  "learning_rate": 6.346456692913386e-05,
1436
+ "loss": 0.0592,
1437
  "step": 238
1438
  },
1439
  {
1440
  "epoch": 7.47,
1441
  "learning_rate": 6.330708661417323e-05,
1442
+ "loss": 0.0803,
1443
  "step": 239
1444
  },
1445
  {
1446
  "epoch": 7.5,
1447
  "learning_rate": 6.31496062992126e-05,
1448
+ "loss": 0.0531,
1449
  "step": 240
1450
  },
1451
  {
1452
  "epoch": 7.53,
1453
  "learning_rate": 6.299212598425197e-05,
1454
+ "loss": 0.0693,
1455
  "step": 241
1456
  },
1457
  {
1458
  "epoch": 7.56,
1459
  "learning_rate": 6.283464566929134e-05,
1460
+ "loss": 0.091,
1461
  "step": 242
1462
  },
1463
  {
1464
  "epoch": 7.59,
1465
  "learning_rate": 6.267716535433072e-05,
1466
+ "loss": 0.0845,
1467
  "step": 243
1468
  },
1469
  {
1470
  "epoch": 7.62,
1471
  "learning_rate": 6.251968503937008e-05,
1472
+ "loss": 0.0642,
1473
  "step": 244
1474
  },
1475
  {
1476
  "epoch": 7.66,
1477
  "learning_rate": 6.236220472440946e-05,
1478
+ "loss": 0.0887,
1479
  "step": 245
1480
  },
1481
  {
1482
  "epoch": 7.69,
1483
  "learning_rate": 6.220472440944882e-05,
1484
+ "loss": 0.0602,
1485
  "step": 246
1486
  },
1487
  {
1488
  "epoch": 7.72,
1489
  "learning_rate": 6.204724409448819e-05,
1490
+ "loss": 0.0803,
1491
  "step": 247
1492
  },
1493
  {
1494
  "epoch": 7.75,
1495
  "learning_rate": 6.188976377952757e-05,
1496
+ "loss": 0.091,
1497
  "step": 248
1498
  },
1499
  {
1500
  "epoch": 7.78,
1501
  "learning_rate": 6.173228346456693e-05,
1502
+ "loss": 0.0935,
1503
  "step": 249
1504
  },
1505
  {
1506
  "epoch": 7.81,
1507
  "learning_rate": 6.15748031496063e-05,
1508
+ "loss": 0.1126,
1509
  "step": 250
1510
  },
1511
  {
1512
  "epoch": 7.84,
1513
  "learning_rate": 6.141732283464568e-05,
1514
+ "loss": 0.0717,
1515
  "step": 251
1516
  },
1517
  {
1518
  "epoch": 7.88,
1519
  "learning_rate": 6.125984251968504e-05,
1520
+ "loss": 0.0888,
1521
  "step": 252
1522
  },
1523
  {
1524
  "epoch": 7.91,
1525
  "learning_rate": 6.110236220472442e-05,
1526
+ "loss": 0.0775,
1527
  "step": 253
1528
  },
1529
  {
1530
  "epoch": 7.94,
1531
  "learning_rate": 6.094488188976378e-05,
1532
+ "loss": 0.0713,
1533
  "step": 254
1534
  },
1535
  {
1536
  "epoch": 7.97,
1537
  "learning_rate": 6.078740157480315e-05,
1538
+ "loss": 0.0697,
1539
  "step": 255
1540
  },
1541
  {
1542
  "epoch": 8.0,
1543
  "learning_rate": 6.0629921259842526e-05,
1544
+ "loss": 0.0904,
1545
  "step": 256
1546
  },
1547
  {
1548
  "epoch": 8.03,
1549
  "learning_rate": 6.047244094488189e-05,
1550
+ "loss": 0.0527,
1551
  "step": 257
1552
  },
1553
  {
1554
  "epoch": 8.06,
1555
  "learning_rate": 6.031496062992126e-05,
1556
+ "loss": 0.0398,
1557
  "step": 258
1558
  },
1559
  {
1560
  "epoch": 8.09,
1561
  "learning_rate": 6.0157480314960636e-05,
1562
+ "loss": 0.0475,
1563
  "step": 259
1564
  },
1565
  {
1566
  "epoch": 8.12,
1567
  "learning_rate": 6e-05,
1568
+ "loss": 0.0493,
1569
  "step": 260
1570
  },
1571
  {
1572
  "epoch": 8.16,
1573
  "learning_rate": 5.984251968503938e-05,
1574
+ "loss": 0.057,
1575
  "step": 261
1576
  },
1577
  {
1578
  "epoch": 8.19,
1579
  "learning_rate": 5.9685039370078746e-05,
1580
+ "loss": 0.0535,
1581
  "step": 262
1582
  },
1583
  {
1584
  "epoch": 8.22,
1585
  "learning_rate": 5.952755905511811e-05,
1586
+ "loss": 0.0554,
1587
  "step": 263
1588
  },
1589
  {
1590
  "epoch": 8.25,
1591
  "learning_rate": 5.9370078740157483e-05,
1592
+ "loss": 0.0593,
1593
  "step": 264
1594
  },
1595
  {
1596
  "epoch": 8.28,
1597
  "learning_rate": 5.9212598425196856e-05,
1598
+ "loss": 0.0591,
1599
  "step": 265
1600
  },
1601
  {
1602
  "epoch": 8.31,
1603
  "learning_rate": 5.905511811023622e-05,
1604
+ "loss": 0.0589,
1605
  "step": 266
1606
  },
1607
  {
1608
  "epoch": 8.34,
1609
  "learning_rate": 5.889763779527559e-05,
1610
+ "loss": 0.0449,
1611
  "step": 267
1612
  },
1613
  {
1614
  "epoch": 8.38,
1615
  "learning_rate": 5.874015748031496e-05,
1616
+ "loss": 0.0541,
1617
  "step": 268
1618
  },
1619
  {
1620
  "epoch": 8.41,
1621
  "learning_rate": 5.858267716535434e-05,
1622
+ "loss": 0.0507,
1623
  "step": 269
1624
  },
1625
  {
1626
  "epoch": 8.44,
1627
  "learning_rate": 5.84251968503937e-05,
1628
+ "loss": 0.0411,
1629
  "step": 270
1630
  },
1631
  {
1632
  "epoch": 8.47,
1633
  "learning_rate": 5.826771653543307e-05,
1634
+ "loss": 0.0617,
1635
  "step": 271
1636
  },
1637
  {
1638
  "epoch": 8.5,
1639
  "learning_rate": 5.811023622047245e-05,
1640
+ "loss": 0.0565,
1641
  "step": 272
1642
  },
1643
  {
1644
  "epoch": 8.53,
1645
  "learning_rate": 5.795275590551181e-05,
1646
+ "loss": 0.083,
1647
  "step": 273
1648
  },
1649
  {
1650
  "epoch": 8.56,
1651
  "learning_rate": 5.779527559055118e-05,
1652
+ "loss": 0.0588,
1653
  "step": 274
1654
  },
1655
  {
1656
  "epoch": 8.59,
1657
  "learning_rate": 5.763779527559056e-05,
1658
+ "loss": 0.0686,
1659
  "step": 275
1660
  },
1661
  {
1662
  "epoch": 8.62,
1663
  "learning_rate": 5.748031496062992e-05,
1664
+ "loss": 0.0712,
1665
  "step": 276
1666
  },
1667
  {
1668
  "epoch": 8.66,
1669
  "learning_rate": 5.73228346456693e-05,
1670
+ "loss": 0.0708,
1671
  "step": 277
1672
  },
1673
  {
1674
  "epoch": 8.69,
1675
  "learning_rate": 5.716535433070867e-05,
1676
+ "loss": 0.0573,
1677
  "step": 278
1678
  },
1679
  {
1680
  "epoch": 8.72,
1681
  "learning_rate": 5.700787401574803e-05,
1682
+ "loss": 0.0563,
1683
  "step": 279
1684
  },
1685
  {
1686
  "epoch": 8.75,
1687
  "learning_rate": 5.6850393700787404e-05,
1688
+ "loss": 0.0567,
1689
  "step": 280
1690
  },
1691
  {
1692
  "epoch": 8.78,
1693
  "learning_rate": 5.6692913385826777e-05,
1694
+ "loss": 0.0517,
1695
  "step": 281
1696
  },
1697
  {
1698
  "epoch": 8.81,
1699
  "learning_rate": 5.653543307086614e-05,
1700
+ "loss": 0.0571,
1701
  "step": 282
1702
  },
1703
  {
1704
  "epoch": 8.84,
1705
  "learning_rate": 5.6377952755905514e-05,
1706
+ "loss": 0.0605,
1707
  "step": 283
1708
  },
1709
  {
1710
  "epoch": 8.88,
1711
  "learning_rate": 5.622047244094488e-05,
1712
+ "loss": 0.0469,
1713
  "step": 284
1714
  },
1715
  {
1716
  "epoch": 8.91,
1717
  "learning_rate": 5.606299212598426e-05,
1718
+ "loss": 0.0653,
1719
  "step": 285
1720
  },
1721
  {
1722
  "epoch": 8.94,
1723
  "learning_rate": 5.5905511811023624e-05,
1724
+ "loss": 0.0578,
1725
  "step": 286
1726
  },
1727
  {
1728
  "epoch": 8.97,
1729
  "learning_rate": 5.574803149606299e-05,
1730
+ "loss": 0.0547,
1731
  "step": 287
1732
  },
1733
  {
1734
  "epoch": 9.0,
1735
  "learning_rate": 5.559055118110237e-05,
1736
+ "loss": 0.0602,
1737
  "step": 288
1738
  },
1739
  {
1740
  "epoch": 9.03,
1741
  "learning_rate": 5.5433070866141734e-05,
1742
+ "loss": 0.0668,
1743
  "step": 289
1744
  },
1745
  {
1746
  "epoch": 9.06,
1747
  "learning_rate": 5.52755905511811e-05,
1748
+ "loss": 0.0458,
1749
  "step": 290
1750
  },
1751
  {
1752
  "epoch": 9.09,
1753
  "learning_rate": 5.511811023622048e-05,
1754
+ "loss": 0.0438,
1755
  "step": 291
1756
  },
1757
  {
1758
  "epoch": 9.12,
1759
  "learning_rate": 5.496062992125984e-05,
1760
+ "loss": 0.0657,
1761
  "step": 292
1762
  },
1763
  {
1764
  "epoch": 9.16,
1765
  "learning_rate": 5.480314960629922e-05,
1766
+ "loss": 0.052,
1767
  "step": 293
1768
  },
1769
  {
1770
  "epoch": 9.19,
1771
  "learning_rate": 5.464566929133859e-05,
1772
+ "loss": 0.0569,
1773
  "step": 294
1774
  },
1775
  {
1776
  "epoch": 9.22,
1777
  "learning_rate": 5.448818897637795e-05,
1778
+ "loss": 0.0357,
1779
  "step": 295
1780
  },
1781
  {
1782
  "epoch": 9.25,
1783
  "learning_rate": 5.433070866141733e-05,
1784
+ "loss": 0.0358,
1785
  "step": 296
1786
  },
1787
  {
1788
  "epoch": 9.28,
1789
  "learning_rate": 5.41732283464567e-05,
1790
+ "loss": 0.0414,
1791
  "step": 297
1792
  },
1793
  {
1794
  "epoch": 9.31,
1795
  "learning_rate": 5.401574803149606e-05,
1796
+ "loss": 0.0384,
1797
  "step": 298
1798
  },
1799
  {
1800
  "epoch": 9.34,
1801
  "learning_rate": 5.3858267716535435e-05,
1802
+ "loss": 0.0662,
1803
  "step": 299
1804
  },
1805
  {
1806
  "epoch": 9.38,
1807
  "learning_rate": 5.37007874015748e-05,
1808
+ "loss": 0.0354,
1809
  "step": 300
1810
  },
1811
  {
1812
  "epoch": 9.41,
1813
  "learning_rate": 5.354330708661418e-05,
1814
+ "loss": 0.0441,
1815
  "step": 301
1816
  },
1817
  {
1818
  "epoch": 9.44,
1819
  "learning_rate": 5.3385826771653545e-05,
1820
+ "loss": 0.0448,
1821
  "step": 302
1822
  },
1823
  {
1824
  "epoch": 9.47,
1825
  "learning_rate": 5.322834645669291e-05,
1826
+ "loss": 0.0478,
1827
  "step": 303
1828
  },
1829
  {
1830
  "epoch": 9.5,
1831
  "learning_rate": 5.307086614173229e-05,
1832
+ "loss": 0.0414,
1833
  "step": 304
1834
  },
1835
  {
1836
  "epoch": 9.53,
1837
  "learning_rate": 5.2913385826771654e-05,
1838
+ "loss": 0.0401,
1839
  "step": 305
1840
  },
1841
  {
1842
  "epoch": 9.56,
1843
  "learning_rate": 5.275590551181102e-05,
1844
+ "loss": 0.0404,
1845
  "step": 306
1846
  },
1847
  {
1848
  "epoch": 9.59,
1849
  "learning_rate": 5.25984251968504e-05,
1850
+ "loss": 0.0544,
1851
  "step": 307
1852
  },
1853
  {
1854
  "epoch": 9.62,
1855
  "learning_rate": 5.2440944881889764e-05,
1856
+ "loss": 0.037,
1857
  "step": 308
1858
  },
1859
  {
1860
  "epoch": 9.66,
1861
  "learning_rate": 5.228346456692914e-05,
1862
+ "loss": 0.0776,
1863
  "step": 309
1864
  },
1865
  {
1866
  "epoch": 9.69,
1867
  "learning_rate": 5.212598425196851e-05,
1868
+ "loss": 0.0722,
1869
  "step": 310
1870
  },
1871
  {
1872
  "epoch": 9.72,
1873
  "learning_rate": 5.1968503937007874e-05,
1874
+ "loss": 0.043,
1875
  "step": 311
1876
  },
1877
  {
1878
  "epoch": 9.75,
1879
  "learning_rate": 5.181102362204725e-05,
1880
+ "loss": 0.0515,
1881
  "step": 312
1882
  },
1883
  {
1884
  "epoch": 9.78,
1885
  "learning_rate": 5.165354330708662e-05,
1886
+ "loss": 0.0584,
1887
  "step": 313
1888
  },
1889
  {
1890
  "epoch": 9.81,
1891
  "learning_rate": 5.1496062992125984e-05,
1892
+ "loss": 0.0542,
1893
  "step": 314
1894
  },
1895
  {
1896
  "epoch": 9.84,
1897
  "learning_rate": 5.1338582677165356e-05,
1898
+ "loss": 0.042,
1899
  "step": 315
1900
  },
1901
  {
1902
  "epoch": 9.88,
1903
  "learning_rate": 5.118110236220473e-05,
1904
+ "loss": 0.0497,
1905
  "step": 316
1906
  },
1907
  {
1908
  "epoch": 9.91,
1909
  "learning_rate": 5.10236220472441e-05,
1910
+ "loss": 0.0558,
1911
  "step": 317
1912
  },
1913
  {
1914
  "epoch": 9.94,
1915
  "learning_rate": 5.0866141732283466e-05,
1916
+ "loss": 0.0696,
1917
  "step": 318
1918
  },
1919
  {
1920
  "epoch": 9.97,
1921
  "learning_rate": 5.070866141732283e-05,
1922
+ "loss": 0.0555,
1923
  "step": 319
1924
  },
1925
  {
1926
  "epoch": 10.0,
1927
  "learning_rate": 5.055118110236221e-05,
1928
+ "loss": 0.0611,
1929
  "step": 320
1930
  },
1931
  {
1932
  "epoch": 10.03,
1933
  "learning_rate": 5.0393700787401575e-05,
1934
+ "loss": 0.0418,
1935
  "step": 321
1936
  },
1937
  {
1938
  "epoch": 10.06,
1939
  "learning_rate": 5.023622047244094e-05,
1940
+ "loss": 0.049,
1941
  "step": 322
1942
  },
1943
  {
1944
  "epoch": 10.09,
1945
  "learning_rate": 5.007874015748032e-05,
1946
+ "loss": 0.0444,
1947
  "step": 323
1948
  },
1949
  {
1950
  "epoch": 10.12,
1951
  "learning_rate": 4.9921259842519685e-05,
1952
+ "loss": 0.0426,
1953
  "step": 324
1954
  },
1955
  {
1956
  "epoch": 10.16,
1957
  "learning_rate": 4.976377952755906e-05,
1958
+ "loss": 0.0338,
1959
  "step": 325
1960
  },
1961
  {
1962
  "epoch": 10.19,
1963
  "learning_rate": 4.960629921259843e-05,
1964
+ "loss": 0.0354,
1965
  "step": 326
1966
  },
1967
  {
1968
  "epoch": 10.22,
1969
  "learning_rate": 4.94488188976378e-05,
1970
+ "loss": 0.0344,
1971
  "step": 327
1972
  },
1973
  {
1974
  "epoch": 10.25,
1975
  "learning_rate": 4.929133858267717e-05,
1976
+ "loss": 0.0312,
1977
  "step": 328
1978
  },
1979
  {
1980
  "epoch": 10.28,
1981
  "learning_rate": 4.913385826771654e-05,
1982
+ "loss": 0.0514,
1983
  "step": 329
1984
  },
1985
  {
1986
  "epoch": 10.31,
1987
  "learning_rate": 4.897637795275591e-05,
1988
+ "loss": 0.0377,
1989
  "step": 330
1990
  },
1991
  {
1992
  "epoch": 10.34,
1993
  "learning_rate": 4.881889763779528e-05,
1994
+ "loss": 0.0398,
1995
  "step": 331
1996
  },
1997
  {
1998
  "epoch": 10.38,
1999
  "learning_rate": 4.866141732283465e-05,
2000
+ "loss": 0.0618,
2001
  "step": 332
2002
  },
2003
  {
2004
  "epoch": 10.41,
2005
  "learning_rate": 4.8503937007874014e-05,
2006
+ "loss": 0.0415,
2007
  "step": 333
2008
  },
2009
  {
2010
  "epoch": 10.44,
2011
  "learning_rate": 4.8346456692913387e-05,
2012
+ "loss": 0.0518,
2013
  "step": 334
2014
  },
2015
  {
2016
  "epoch": 10.47,
2017
  "learning_rate": 4.818897637795276e-05,
2018
+ "loss": 0.0606,
2019
  "step": 335
2020
  },
2021
  {
2022
  "epoch": 10.5,
2023
  "learning_rate": 4.8031496062992124e-05,
2024
+ "loss": 0.0317,
2025
  "step": 336
2026
  },
2027
  {
2028
  "epoch": 10.53,
2029
  "learning_rate": 4.7874015748031496e-05,
2030
+ "loss": 0.0508,
2031
  "step": 337
2032
  },
2033
  {
2034
  "epoch": 10.56,
2035
  "learning_rate": 4.771653543307087e-05,
2036
+ "loss": 0.0311,
2037
  "step": 338
2038
  },
2039
  {
2040
  "epoch": 10.59,
2041
  "learning_rate": 4.755905511811024e-05,
2042
+ "loss": 0.0422,
2043
  "step": 339
2044
  },
2045
  {
2046
  "epoch": 10.62,
2047
  "learning_rate": 4.7401574803149606e-05,
2048
+ "loss": 0.0364,
2049
  "step": 340
2050
  },
2051
  {
2052
  "epoch": 10.66,
2053
  "learning_rate": 4.724409448818898e-05,
2054
+ "loss": 0.0521,
2055
  "step": 341
2056
  },
2057
  {
2058
  "epoch": 10.69,
2059
  "learning_rate": 4.708661417322835e-05,
2060
+ "loss": 0.0359,
2061
  "step": 342
2062
  },
2063
  {
2064
  "epoch": 10.72,
2065
  "learning_rate": 4.692913385826772e-05,
2066
+ "loss": 0.0495,
2067
  "step": 343
2068
  },
2069
  {
2070
  "epoch": 10.75,
2071
  "learning_rate": 4.677165354330709e-05,
2072
+ "loss": 0.0397,
2073
  "step": 344
2074
  },
2075
  {
2076
  "epoch": 10.78,
2077
  "learning_rate": 4.661417322834646e-05,
2078
+ "loss": 0.0475,
2079
  "step": 345
2080
  },
2081
  {
2082
  "epoch": 10.81,
2083
  "learning_rate": 4.645669291338583e-05,
2084
+ "loss": 0.0406,
2085
  "step": 346
2086
  },
2087
  {
2088
  "epoch": 10.84,
2089
  "learning_rate": 4.6299212598425204e-05,
2090
+ "loss": 0.0624,
2091
  "step": 347
2092
  },
2093
  {
2094
  "epoch": 10.88,
2095
  "learning_rate": 4.614173228346457e-05,
2096
+ "loss": 0.0423,
2097
  "step": 348
2098
  },
2099
  {
2100
  "epoch": 10.91,
2101
  "learning_rate": 4.5984251968503935e-05,
2102
+ "loss": 0.053,
2103
  "step": 349
2104
  },
2105
  {
2106
  "epoch": 10.94,
2107
  "learning_rate": 4.582677165354331e-05,
2108
+ "loss": 0.0732,
2109
  "step": 350
2110
  },
2111
  {
2112
  "epoch": 10.97,
2113
  "learning_rate": 4.566929133858268e-05,
2114
+ "loss": 0.0517,
2115
  "step": 351
2116
  },
2117
  {
2118
  "epoch": 11.0,
2119
  "learning_rate": 4.5511811023622045e-05,
2120
+ "loss": 0.0678,
2121
  "step": 352
2122
  },
2123
  {
2124
  "epoch": 11.03,
2125
  "learning_rate": 4.535433070866142e-05,
2126
+ "loss": 0.0337,
2127
  "step": 353
2128
  },
2129
  {
2130
  "epoch": 11.06,
2131
  "learning_rate": 4.519685039370079e-05,
2132
+ "loss": 0.0304,
2133
  "step": 354
2134
  },
2135
  {
2136
  "epoch": 11.09,
2137
  "learning_rate": 4.503937007874016e-05,
2138
+ "loss": 0.0385,
2139
  "step": 355
2140
  },
2141
  {
2142
  "epoch": 11.12,
2143
  "learning_rate": 4.488188976377953e-05,
2144
+ "loss": 0.0327,
2145
  "step": 356
2146
  },
2147
  {
2148
  "epoch": 11.16,
2149
  "learning_rate": 4.47244094488189e-05,
2150
+ "loss": 0.0481,
2151
  "step": 357
2152
  },
2153
  {
2154
  "epoch": 11.19,
2155
  "learning_rate": 4.456692913385827e-05,
2156
+ "loss": 0.0273,
2157
  "step": 358
2158
  },
2159
  {
2160
  "epoch": 11.22,
2161
  "learning_rate": 4.4409448818897643e-05,
2162
+ "loss": 0.0422,
2163
  "step": 359
2164
  },
2165
  {
2166
  "epoch": 11.25,
2167
  "learning_rate": 4.425196850393701e-05,
2168
+ "loss": 0.0368,
2169
  "step": 360
2170
  },
2171
  {
2172
  "epoch": 11.28,
2173
  "learning_rate": 4.409448818897638e-05,
2174
+ "loss": 0.0349,
2175
  "step": 361
2176
  },
2177
  {
2178
  "epoch": 11.31,
2179
  "learning_rate": 4.393700787401575e-05,
2180
+ "loss": 0.0316,
2181
  "step": 362
2182
  },
2183
  {
2184
  "epoch": 11.34,
2185
  "learning_rate": 4.3779527559055125e-05,
2186
+ "loss": 0.0305,
2187
  "step": 363
2188
  },
2189
  {
2190
  "epoch": 11.38,
2191
  "learning_rate": 4.362204724409449e-05,
2192
+ "loss": 0.0434,
2193
  "step": 364
2194
  },
2195
  {
2196
  "epoch": 11.41,
2197
  "learning_rate": 4.346456692913386e-05,
2198
+ "loss": 0.041,
2199
  "step": 365
2200
  },
2201
  {
2202
  "epoch": 11.44,
2203
  "learning_rate": 4.330708661417323e-05,
2204
+ "loss": 0.0446,
2205
  "step": 366
2206
  },
2207
  {
2208
  "epoch": 11.47,
2209
  "learning_rate": 4.31496062992126e-05,
2210
+ "loss": 0.0382,
2211
  "step": 367
2212
  },
2213
  {
2214
  "epoch": 11.5,
2215
  "learning_rate": 4.2992125984251966e-05,
2216
+ "loss": 0.0379,
2217
  "step": 368
2218
  },
2219
  {
2220
  "epoch": 11.53,
2221
  "learning_rate": 4.283464566929134e-05,
2222
+ "loss": 0.0363,
2223
  "step": 369
2224
  },
2225
  {
2226
  "epoch": 11.56,
2227
  "learning_rate": 4.267716535433071e-05,
2228
+ "loss": 0.0386,
2229
  "step": 370
2230
  },
2231
  {
2232
  "epoch": 11.59,
2233
  "learning_rate": 4.251968503937008e-05,
2234
+ "loss": 0.0251,
2235
  "step": 371
2236
  },
2237
  {
2238
  "epoch": 11.62,
2239
  "learning_rate": 4.236220472440945e-05,
2240
+ "loss": 0.0336,
2241
  "step": 372
2242
  },
2243
  {
2244
  "epoch": 11.66,
2245
  "learning_rate": 4.220472440944882e-05,
2246
+ "loss": 0.0438,
2247
  "step": 373
2248
  },
2249
  {
2250
  "epoch": 11.69,
2251
  "learning_rate": 4.204724409448819e-05,
2252
+ "loss": 0.0442,
2253
  "step": 374
2254
  },
2255
  {
2256
  "epoch": 11.72,
2257
  "learning_rate": 4.1889763779527564e-05,
2258
+ "loss": 0.0285,
2259
  "step": 375
2260
  },
2261
  {
2262
  "epoch": 11.75,
2263
  "learning_rate": 4.173228346456693e-05,
2264
+ "loss": 0.0388,
2265
  "step": 376
2266
  },
2267
  {
2268
  "epoch": 11.78,
2269
  "learning_rate": 4.15748031496063e-05,
2270
+ "loss": 0.0442,
2271
  "step": 377
2272
  },
2273
  {
2274
  "epoch": 11.81,
2275
  "learning_rate": 4.1417322834645674e-05,
2276
+ "loss": 0.0371,
2277
  "step": 378
2278
  },
2279
  {
2280
  "epoch": 11.84,
2281
  "learning_rate": 4.1259842519685046e-05,
2282
+ "loss": 0.0488,
2283
  "step": 379
2284
  },
2285
  {
2286
  "epoch": 11.88,
2287
  "learning_rate": 4.110236220472441e-05,
2288
+ "loss": 0.0508,
2289
  "step": 380
2290
  },
2291
  {
2292
  "epoch": 11.91,
2293
  "learning_rate": 4.0944881889763784e-05,
2294
+ "loss": 0.0455,
2295
  "step": 381
2296
  },
2297
  {
2298
  "epoch": 11.94,
2299
  "learning_rate": 4.078740157480315e-05,
2300
+ "loss": 0.0432,
2301
  "step": 382
2302
  },
2303
  {
2304
  "epoch": 11.97,
2305
  "learning_rate": 4.062992125984252e-05,
2306
+ "loss": 0.033,
2307
  "step": 383
2308
  },
2309
  {
2310
  "epoch": 12.0,
2311
  "learning_rate": 4.047244094488189e-05,
2312
+ "loss": 0.0446,
2313
  "step": 384
2314
  },
2315
  {
2316
  "epoch": 12.03,
2317
  "learning_rate": 4.031496062992126e-05,
2318
+ "loss": 0.0355,
2319
  "step": 385
2320
  },
2321
  {
2322
  "epoch": 12.06,
2323
  "learning_rate": 4.015748031496063e-05,
2324
+ "loss": 0.0468,
2325
  "step": 386
2326
  },
2327
  {
2328
  "epoch": 12.09,
2329
  "learning_rate": 4e-05,
2330
+ "loss": 0.0273,
2331
  "step": 387
2332
  },
2333
  {
2334
  "epoch": 12.12,
2335
  "learning_rate": 3.984251968503937e-05,
2336
+ "loss": 0.0344,
2337
  "step": 388
2338
  },
2339
  {
2340
  "epoch": 12.16,
2341
  "learning_rate": 3.968503937007874e-05,
2342
+ "loss": 0.0331,
2343
  "step": 389
2344
  },
2345
  {
2346
  "epoch": 12.19,
2347
  "learning_rate": 3.952755905511811e-05,
2348
+ "loss": 0.0369,
2349
  "step": 390
2350
  },
2351
  {
2352
  "epoch": 12.22,
2353
  "learning_rate": 3.9370078740157485e-05,
2354
+ "loss": 0.0324,
2355
  "step": 391
2356
  },
2357
  {
2358
  "epoch": 12.25,
2359
  "learning_rate": 3.921259842519685e-05,
2360
+ "loss": 0.0294,
2361
  "step": 392
2362
  },
2363
  {
2364
  "epoch": 12.28,
2365
  "learning_rate": 3.905511811023622e-05,
2366
+ "loss": 0.0441,
2367
  "step": 393
2368
  },
2369
  {
2370
  "epoch": 12.31,
2371
  "learning_rate": 3.8897637795275595e-05,
2372
+ "loss": 0.0326,
2373
  "step": 394
2374
  },
2375
  {
2376
  "epoch": 12.34,
2377
  "learning_rate": 3.874015748031497e-05,
2378
+ "loss": 0.0288,
2379
  "step": 395
2380
  },
2381
  {
2382
  "epoch": 12.38,
2383
  "learning_rate": 3.858267716535433e-05,
2384
+ "loss": 0.0305,
2385
  "step": 396
2386
  },
2387
  {
2388
  "epoch": 12.41,
2389
  "learning_rate": 3.8425196850393705e-05,
2390
+ "loss": 0.0286,
2391
  "step": 397
2392
  },
2393
  {
2394
  "epoch": 12.44,
2395
  "learning_rate": 3.826771653543308e-05,
2396
+ "loss": 0.0327,
2397
  "step": 398
2398
  },
2399
  {
2400
  "epoch": 12.47,
2401
  "learning_rate": 3.811023622047244e-05,
2402
+ "loss": 0.0814,
2403
  "step": 399
2404
  },
2405
  {
2406
  "epoch": 12.5,
2407
  "learning_rate": 3.795275590551181e-05,
2408
+ "loss": 0.0269,
2409
  "step": 400
2410
  },
2411
  {
2412
  "epoch": 12.53,
2413
  "learning_rate": 3.779527559055118e-05,
2414
+ "loss": 0.0352,
2415
  "step": 401
2416
  },
2417
  {
2418
  "epoch": 12.56,
2419
  "learning_rate": 3.763779527559055e-05,
2420
+ "loss": 0.0331,
2421
  "step": 402
2422
  },
2423
  {
2424
  "epoch": 12.59,
2425
  "learning_rate": 3.7480314960629924e-05,
2426
+ "loss": 0.0327,
2427
  "step": 403
2428
  },
2429
  {
2430
  "epoch": 12.62,
2431
  "learning_rate": 3.732283464566929e-05,
2432
+ "loss": 0.0544,
2433
  "step": 404
2434
  },
2435
  {
2436
  "epoch": 12.66,
2437
  "learning_rate": 3.716535433070866e-05,
2438
+ "loss": 0.0395,
2439
  "step": 405
2440
  },
2441
  {
2442
  "epoch": 12.69,
2443
  "learning_rate": 3.7007874015748034e-05,
2444
+ "loss": 0.0323,
2445
  "step": 406
2446
  },
2447
  {
2448
  "epoch": 12.72,
2449
  "learning_rate": 3.6850393700787406e-05,
2450
+ "loss": 0.037,
2451
  "step": 407
2452
  },
2453
  {
2454
  "epoch": 12.75,
2455
  "learning_rate": 3.669291338582677e-05,
2456
+ "loss": 0.0344,
2457
  "step": 408
2458
  },
2459
  {
2460
  "epoch": 12.78,
2461
  "learning_rate": 3.6535433070866144e-05,
2462
+ "loss": 0.0358,
2463
  "step": 409
2464
  },
2465
  {
2466
  "epoch": 12.81,
2467
  "learning_rate": 3.6377952755905516e-05,
2468
+ "loss": 0.0311,
2469
  "step": 410
2470
  },
2471
  {
2472
  "epoch": 12.84,
2473
  "learning_rate": 3.622047244094489e-05,
2474
+ "loss": 0.0467,
2475
  "step": 411
2476
  },
2477
  {
2478
  "epoch": 12.88,
2479
  "learning_rate": 3.6062992125984253e-05,
2480
+ "loss": 0.0316,
2481
  "step": 412
2482
  },
2483
  {
2484
  "epoch": 12.91,
2485
  "learning_rate": 3.5905511811023626e-05,
2486
+ "loss": 0.0433,
2487
  "step": 413
2488
  },
2489
  {
2490
  "epoch": 12.94,
2491
  "learning_rate": 3.5748031496063e-05,
2492
+ "loss": 0.0489,
2493
  "step": 414
2494
  },
2495
  {
2496
  "epoch": 12.97,
2497
  "learning_rate": 3.559055118110236e-05,
2498
+ "loss": 0.0383,
2499
  "step": 415
2500
  },
2501
  {
2502
  "epoch": 13.0,
2503
  "learning_rate": 3.5433070866141735e-05,
2504
+ "loss": 0.0399,
2505
  "step": 416
2506
  },
2507
  {
2508
  "epoch": 13.03,
2509
  "learning_rate": 3.52755905511811e-05,
2510
+ "loss": 0.0297,
2511
  "step": 417
2512
  },
2513
  {
2514
  "epoch": 13.06,
2515
  "learning_rate": 3.511811023622047e-05,
2516
+ "loss": 0.0382,
2517
  "step": 418
2518
  },
2519
  {
2520
  "epoch": 13.09,
2521
  "learning_rate": 3.4960629921259845e-05,
2522
+ "loss": 0.0259,
2523
  "step": 419
2524
  },
2525
  {
2526
  "epoch": 13.12,
2527
  "learning_rate": 3.480314960629921e-05,
2528
+ "loss": 0.0287,
2529
  "step": 420
2530
  },
2531
  {
2532
  "epoch": 13.16,
2533
  "learning_rate": 3.464566929133858e-05,
2534
+ "loss": 0.0335,
2535
  "step": 421
2536
  },
2537
  {
2538
  "epoch": 13.19,
2539
  "learning_rate": 3.4488188976377955e-05,
2540
+ "loss": 0.0313,
2541
  "step": 422
2542
  },
2543
  {
2544
  "epoch": 13.22,
2545
  "learning_rate": 3.433070866141733e-05,
2546
+ "loss": 0.0278,
2547
  "step": 423
2548
  },
2549
  {
2550
  "epoch": 13.25,
2551
  "learning_rate": 3.417322834645669e-05,
2552
+ "loss": 0.0327,
2553
  "step": 424
2554
  },
2555
  {
2556
  "epoch": 13.28,
2557
  "learning_rate": 3.4015748031496065e-05,
2558
+ "loss": 0.0327,
2559
  "step": 425
2560
  },
2561
  {
2562
  "epoch": 13.31,
2563
  "learning_rate": 3.385826771653544e-05,
2564
+ "loss": 0.0246,
2565
  "step": 426
2566
  },
2567
  {
2568
  "epoch": 13.34,
2569
  "learning_rate": 3.370078740157481e-05,
2570
+ "loss": 0.0327,
2571
  "step": 427
2572
  },
2573
  {
2574
  "epoch": 13.38,
2575
  "learning_rate": 3.3543307086614174e-05,
2576
+ "loss": 0.0261,
2577
  "step": 428
2578
  },
2579
  {
2580
  "epoch": 13.41,
2581
  "learning_rate": 3.3385826771653546e-05,
2582
+ "loss": 0.0359,
2583
  "step": 429
2584
  },
2585
  {
2586
  "epoch": 13.44,
2587
  "learning_rate": 3.322834645669292e-05,
2588
+ "loss": 0.0321,
2589
  "step": 430
2590
  },
2591
  {
2592
  "epoch": 13.47,
2593
  "learning_rate": 3.3070866141732284e-05,
2594
+ "loss": 0.0329,
2595
  "step": 431
2596
  },
2597
  {
2598
  "epoch": 13.5,
2599
  "learning_rate": 3.2913385826771656e-05,
2600
+ "loss": 0.0303,
2601
  "step": 432
2602
  },
2603
  {
2604
  "epoch": 13.53,
2605
  "learning_rate": 3.275590551181102e-05,
2606
+ "loss": 0.0372,
2607
  "step": 433
2608
  },
2609
  {
2610
  "epoch": 13.56,
2611
  "learning_rate": 3.2598425196850394e-05,
2612
+ "loss": 0.0255,
2613
  "step": 434
2614
  },
2615
  {
2616
  "epoch": 13.59,
2617
  "learning_rate": 3.2440944881889766e-05,
2618
+ "loss": 0.0357,
2619
  "step": 435
2620
  },
2621
  {
2622
  "epoch": 13.62,
2623
  "learning_rate": 3.228346456692913e-05,
2624
+ "loss": 0.0536,
2625
  "step": 436
2626
  },
2627
  {
2628
  "epoch": 13.66,
2629
  "learning_rate": 3.2125984251968504e-05,
2630
+ "loss": 0.0323,
2631
  "step": 437
2632
  },
2633
  {
2634
  "epoch": 13.69,
2635
  "learning_rate": 3.1968503937007876e-05,
2636
+ "loss": 0.0273,
2637
  "step": 438
2638
  },
2639
  {
2640
  "epoch": 13.72,
2641
  "learning_rate": 3.181102362204725e-05,
2642
+ "loss": 0.0366,
2643
  "step": 439
2644
  },
2645
  {
2646
  "epoch": 13.75,
2647
  "learning_rate": 3.165354330708661e-05,
2648
+ "loss": 0.0339,
2649
  "step": 440
2650
  },
2651
  {
2652
  "epoch": 13.78,
2653
  "learning_rate": 3.1496062992125985e-05,
2654
+ "loss": 0.0312,
2655
  "step": 441
2656
  },
2657
  {
2658
  "epoch": 13.81,
2659
  "learning_rate": 3.133858267716536e-05,
2660
+ "loss": 0.0355,
2661
  "step": 442
2662
  },
2663
  {
2664
  "epoch": 13.84,
2665
  "learning_rate": 3.118110236220473e-05,
2666
+ "loss": 0.029,
2667
  "step": 443
2668
  },
2669
  {
2670
  "epoch": 13.88,
2671
  "learning_rate": 3.1023622047244095e-05,
2672
+ "loss": 0.0336,
2673
  "step": 444
2674
  },
2675
  {
2676
  "epoch": 13.91,
2677
  "learning_rate": 3.086614173228347e-05,
2678
+ "loss": 0.0374,
2679
  "step": 445
2680
  },
2681
  {
2682
  "epoch": 13.94,
2683
  "learning_rate": 3.070866141732284e-05,
2684
+ "loss": 0.0276,
2685
  "step": 446
2686
  },
2687
  {
2688
  "epoch": 13.97,
2689
  "learning_rate": 3.055118110236221e-05,
2690
+ "loss": 0.0282,
2691
  "step": 447
2692
  },
2693
  {
2694
  "epoch": 14.0,
2695
  "learning_rate": 3.0393700787401574e-05,
2696
+ "loss": 0.0377,
2697
  "step": 448
2698
  },
2699
  {
2700
  "epoch": 14.03,
2701
  "learning_rate": 3.0236220472440946e-05,
2702
+ "loss": 0.0327,
2703
  "step": 449
2704
  },
2705
  {
2706
  "epoch": 14.06,
2707
  "learning_rate": 3.0078740157480318e-05,
2708
+ "loss": 0.0286,
2709
  "step": 450
2710
  },
2711
  {
2712
  "epoch": 14.09,
2713
  "learning_rate": 2.992125984251969e-05,
2714
+ "loss": 0.0307,
2715
  "step": 451
2716
  },
2717
  {
2718
  "epoch": 14.12,
2719
  "learning_rate": 2.9763779527559056e-05,
2720
+ "loss": 0.0247,
2721
  "step": 452
2722
  },
2723
  {
2724
  "epoch": 14.16,
2725
  "learning_rate": 2.9606299212598428e-05,
2726
+ "loss": 0.0259,
2727
  "step": 453
2728
  },
2729
  {
2730
  "epoch": 14.19,
2731
  "learning_rate": 2.9448818897637797e-05,
2732
+ "loss": 0.0227,
2733
  "step": 454
2734
  },
2735
  {
2736
  "epoch": 14.22,
2737
  "learning_rate": 2.929133858267717e-05,
2738
+ "loss": 0.0264,
2739
  "step": 455
2740
  },
2741
  {
2742
  "epoch": 14.25,
2743
  "learning_rate": 2.9133858267716534e-05,
2744
+ "loss": 0.0292,
2745
  "step": 456
2746
  },
2747
  {
2748
  "epoch": 14.28,
2749
  "learning_rate": 2.8976377952755906e-05,
2750
+ "loss": 0.025,
2751
  "step": 457
2752
  },
2753
  {
2754
  "epoch": 14.31,
2755
  "learning_rate": 2.881889763779528e-05,
2756
+ "loss": 0.0321,
2757
  "step": 458
2758
  },
2759
  {
2760
  "epoch": 14.34,
2761
  "learning_rate": 2.866141732283465e-05,
2762
+ "loss": 0.0296,
2763
  "step": 459
2764
  },
2765
  {
2766
  "epoch": 14.38,
2767
  "learning_rate": 2.8503937007874016e-05,
2768
+ "loss": 0.0326,
2769
  "step": 460
2770
  },
2771
  {
2772
  "epoch": 14.41,
2773
  "learning_rate": 2.8346456692913388e-05,
2774
+ "loss": 0.0284,
2775
  "step": 461
2776
  },
2777
  {
2778
  "epoch": 14.44,
2779
  "learning_rate": 2.8188976377952757e-05,
2780
+ "loss": 0.0307,
2781
  "step": 462
2782
  },
2783
  {
2784
  "epoch": 14.47,
2785
  "learning_rate": 2.803149606299213e-05,
2786
+ "loss": 0.0285,
2787
  "step": 463
2788
  },
2789
  {
2790
  "epoch": 14.5,
2791
  "learning_rate": 2.7874015748031495e-05,
2792
+ "loss": 0.0252,
2793
  "step": 464
2794
  },
2795
  {
2796
  "epoch": 14.53,
2797
  "learning_rate": 2.7716535433070867e-05,
2798
+ "loss": 0.0424,
2799
  "step": 465
2800
  },
2801
  {
2802
  "epoch": 14.56,
2803
  "learning_rate": 2.755905511811024e-05,
2804
+ "loss": 0.0381,
2805
  "step": 466
2806
  },
2807
  {
2808
  "epoch": 14.59,
2809
  "learning_rate": 2.740157480314961e-05,
2810
+ "loss": 0.032,
2811
  "step": 467
2812
  },
2813
  {
2814
  "epoch": 14.62,
2815
  "learning_rate": 2.7244094488188977e-05,
2816
+ "loss": 0.027,
2817
  "step": 468
2818
  },
2819
  {
2820
  "epoch": 14.66,
2821
  "learning_rate": 2.708661417322835e-05,
2822
+ "loss": 0.0295,
2823
  "step": 469
2824
  },
2825
  {
2826
  "epoch": 14.69,
2827
  "learning_rate": 2.6929133858267717e-05,
2828
+ "loss": 0.0283,
2829
  "step": 470
2830
  },
2831
  {
2832
  "epoch": 14.72,
2833
  "learning_rate": 2.677165354330709e-05,
2834
+ "loss": 0.0292,
2835
  "step": 471
2836
  },
2837
  {
2838
  "epoch": 14.75,
2839
  "learning_rate": 2.6614173228346455e-05,
2840
+ "loss": 0.0368,
2841
  "step": 472
2842
  },
2843
  {
2844
  "epoch": 14.78,
2845
  "learning_rate": 2.6456692913385827e-05,
2846
+ "loss": 0.04,
2847
  "step": 473
2848
  },
2849
  {
2850
  "epoch": 14.81,
2851
  "learning_rate": 2.62992125984252e-05,
2852
+ "loss": 0.0361,
2853
  "step": 474
2854
  },
2855
  {
2856
  "epoch": 14.84,
2857
  "learning_rate": 2.614173228346457e-05,
2858
+ "loss": 0.031,
2859
  "step": 475
2860
  },
2861
  {
2862
  "epoch": 14.88,
2863
  "learning_rate": 2.5984251968503937e-05,
2864
+ "loss": 0.0318,
2865
  "step": 476
2866
  },
2867
  {
2868
  "epoch": 14.91,
2869
  "learning_rate": 2.582677165354331e-05,
2870
+ "loss": 0.0329,
2871
  "step": 477
2872
  },
2873
  {
2874
  "epoch": 14.94,
2875
  "learning_rate": 2.5669291338582678e-05,
2876
+ "loss": 0.0322,
2877
  "step": 478
2878
  },
2879
  {
2880
  "epoch": 14.97,
2881
  "learning_rate": 2.551181102362205e-05,
2882
+ "loss": 0.0262,
2883
  "step": 479
2884
  },
2885
  {
2886
  "epoch": 15.0,
2887
  "learning_rate": 2.5354330708661416e-05,
2888
+ "loss": 0.0279,
2889
  "step": 480
2890
  },
2891
  {
2892
  "epoch": 15.03,
2893
  "learning_rate": 2.5196850393700788e-05,
2894
+ "loss": 0.0291,
2895
  "step": 481
2896
  },
2897
  {
2898
  "epoch": 15.06,
2899
  "learning_rate": 2.503937007874016e-05,
2900
+ "loss": 0.0281,
2901
  "step": 482
2902
  },
2903
  {
2904
  "epoch": 15.09,
2905
  "learning_rate": 2.488188976377953e-05,
2906
+ "loss": 0.0301,
2907
  "step": 483
2908
  },
2909
  {
2910
  "epoch": 15.12,
2911
  "learning_rate": 2.47244094488189e-05,
2912
+ "loss": 0.0269,
2913
  "step": 484
2914
  },
2915
  {
2916
  "epoch": 15.16,
2917
  "learning_rate": 2.456692913385827e-05,
2918
+ "loss": 0.026,
2919
  "step": 485
2920
  },
2921
  {
2922
  "epoch": 15.19,
2923
  "learning_rate": 2.440944881889764e-05,
2924
+ "loss": 0.0273,
2925
  "step": 486
2926
  },
2927
  {
2928
  "epoch": 15.22,
2929
  "learning_rate": 2.4251968503937007e-05,
2930
+ "loss": 0.0281,
2931
  "step": 487
2932
  },
2933
  {
2934
  "epoch": 15.25,
2935
  "learning_rate": 2.409448818897638e-05,
2936
+ "loss": 0.0257,
2937
  "step": 488
2938
  },
2939
  {
2940
  "epoch": 15.28,
2941
  "learning_rate": 2.3937007874015748e-05,
2942
+ "loss": 0.0281,
2943
  "step": 489
2944
  },
2945
  {
2946
  "epoch": 15.31,
2947
  "learning_rate": 2.377952755905512e-05,
2948
+ "loss": 0.0313,
2949
  "step": 490
2950
  },
2951
  {
2952
  "epoch": 15.34,
2953
  "learning_rate": 2.362204724409449e-05,
2954
+ "loss": 0.0376,
2955
  "step": 491
2956
  },
2957
  {
2958
  "epoch": 15.38,
2959
  "learning_rate": 2.346456692913386e-05,
2960
+ "loss": 0.0304,
2961
  "step": 492
2962
  },
2963
  {
2964
  "epoch": 15.41,
2965
  "learning_rate": 2.330708661417323e-05,
2966
+ "loss": 0.0221,
2967
  "step": 493
2968
  },
2969
  {
2970
  "epoch": 15.44,
2971
  "learning_rate": 2.3149606299212602e-05,
2972
+ "loss": 0.0289,
2973
  "step": 494
2974
  },
2975
  {
2976
  "epoch": 15.47,
2977
  "learning_rate": 2.2992125984251968e-05,
2978
+ "loss": 0.0281,
2979
  "step": 495
2980
  },
2981
  {
2982
  "epoch": 15.5,
2983
  "learning_rate": 2.283464566929134e-05,
2984
+ "loss": 0.0272,
2985
  "step": 496
2986
  },
2987
  {
2988
  "epoch": 15.53,
2989
  "learning_rate": 2.267716535433071e-05,
2990
+ "loss": 0.022,
2991
  "step": 497
2992
  },
2993
  {
2994
  "epoch": 15.56,
2995
  "learning_rate": 2.251968503937008e-05,
2996
+ "loss": 0.0327,
2997
  "step": 498
2998
  },
2999
  {
3000
  "epoch": 15.59,
3001
  "learning_rate": 2.236220472440945e-05,
3002
+ "loss": 0.0328,
3003
  "step": 499
3004
  },
3005
  {
3006
  "epoch": 15.62,
3007
  "learning_rate": 2.2204724409448822e-05,
3008
+ "loss": 0.0329,
3009
  "step": 500
3010
  },
3011
  {
3012
  "epoch": 15.66,
3013
  "learning_rate": 2.204724409448819e-05,
3014
+ "loss": 0.0322,
3015
  "step": 501
3016
  },
3017
  {
3018
  "epoch": 15.69,
3019
  "learning_rate": 2.1889763779527563e-05,
3020
+ "loss": 0.0317,
3021
  "step": 502
3022
  },
3023
  {
3024
  "epoch": 15.72,
3025
  "learning_rate": 2.173228346456693e-05,
3026
+ "loss": 0.0261,
3027
  "step": 503
3028
  },
3029
  {
3030
  "epoch": 15.75,
3031
  "learning_rate": 2.15748031496063e-05,
3032
+ "loss": 0.0342,
3033
  "step": 504
3034
  },
3035
  {
3036
  "epoch": 15.78,
3037
  "learning_rate": 2.141732283464567e-05,
3038
+ "loss": 0.0308,
3039
  "step": 505
3040
  },
3041
  {
3042
  "epoch": 15.81,
3043
  "learning_rate": 2.125984251968504e-05,
3044
+ "loss": 0.0291,
3045
  "step": 506
3046
  },
3047
  {
3048
  "epoch": 15.84,
3049
  "learning_rate": 2.110236220472441e-05,
3050
+ "loss": 0.0301,
3051
  "step": 507
3052
  },
3053
  {
3054
  "epoch": 15.88,
3055
  "learning_rate": 2.0944881889763782e-05,
3056
+ "loss": 0.0301,
3057
  "step": 508
3058
  },
3059
  {
3060
  "epoch": 15.91,
3061
  "learning_rate": 2.078740157480315e-05,
3062
+ "loss": 0.0271,
3063
  "step": 509
3064
  },
3065
  {
3066
  "epoch": 15.94,
3067
  "learning_rate": 2.0629921259842523e-05,
3068
+ "loss": 0.0314,
3069
  "step": 510
3070
  },
3071
  {
3072
  "epoch": 15.97,
3073
  "learning_rate": 2.0472440944881892e-05,
3074
+ "loss": 0.0294,
3075
  "step": 511
3076
  },
3077
  {
3078
  "epoch": 16.0,
3079
  "learning_rate": 2.031496062992126e-05,
3080
+ "loss": 0.0293,
3081
  "step": 512
3082
  },
3083
  {
3084
  "epoch": 16.03,
3085
  "learning_rate": 2.015748031496063e-05,
3086
+ "loss": 0.0267,
3087
  "step": 513
3088
  },
3089
  {
3090
  "epoch": 16.06,
3091
  "learning_rate": 2e-05,
3092
+ "loss": 0.0228,
3093
  "step": 514
3094
  },
3095
  {
3096
  "epoch": 16.09,
3097
  "learning_rate": 1.984251968503937e-05,
3098
+ "loss": 0.0275,
3099
  "step": 515
3100
  },
3101
  {
3102
  "epoch": 16.12,
3103
  "learning_rate": 1.9685039370078743e-05,
3104
+ "loss": 0.0269,
3105
  "step": 516
3106
  },
3107
  {
3108
  "epoch": 16.16,
3109
  "learning_rate": 1.952755905511811e-05,
3110
+ "loss": 0.0225,
3111
  "step": 517
3112
  },
3113
  {
3114
  "epoch": 16.19,
3115
  "learning_rate": 1.9370078740157484e-05,
3116
+ "loss": 0.0293,
3117
  "step": 518
3118
  },
3119
  {
3120
  "epoch": 16.22,
3121
  "learning_rate": 1.9212598425196852e-05,
3122
+ "loss": 0.0313,
3123
  "step": 519
3124
  },
3125
  {
3126
  "epoch": 16.25,
3127
  "learning_rate": 1.905511811023622e-05,
3128
+ "loss": 0.0259,
3129
  "step": 520
3130
  },
3131
  {
3132
  "epoch": 16.28,
3133
  "learning_rate": 1.889763779527559e-05,
3134
+ "loss": 0.0325,
3135
  "step": 521
3136
  },
3137
  {
3138
  "epoch": 16.31,
3139
  "learning_rate": 1.8740157480314962e-05,
3140
+ "loss": 0.0275,
3141
  "step": 522
3142
  },
3143
  {
3144
  "epoch": 16.34,
3145
  "learning_rate": 1.858267716535433e-05,
3146
+ "loss": 0.0307,
3147
  "step": 523
3148
  },
3149
  {
3150
  "epoch": 16.38,
3151
  "learning_rate": 1.8425196850393703e-05,
3152
+ "loss": 0.0292,
3153
  "step": 524
3154
  },
3155
  {
3156
  "epoch": 16.41,
3157
  "learning_rate": 1.8267716535433072e-05,
3158
+ "loss": 0.0298,
3159
  "step": 525
3160
  },
3161
  {
3162
  "epoch": 16.44,
3163
  "learning_rate": 1.8110236220472444e-05,
3164
+ "loss": 0.0247,
3165
  "step": 526
3166
  },
3167
  {
3168
  "epoch": 16.47,
3169
  "learning_rate": 1.7952755905511813e-05,
3170
+ "loss": 0.0267,
3171
  "step": 527
3172
  },
3173
  {
3174
  "epoch": 16.5,
3175
  "learning_rate": 1.779527559055118e-05,
3176
+ "loss": 0.0272,
3177
  "step": 528
3178
  },
3179
  {
3180
  "epoch": 16.53,
3181
  "learning_rate": 1.763779527559055e-05,
3182
+ "loss": 0.0334,
3183
  "step": 529
3184
  },
3185
  {
3186
  "epoch": 16.56,
3187
  "learning_rate": 1.7480314960629923e-05,
3188
+ "loss": 0.0277,
3189
  "step": 530
3190
  },
3191
  {
3192
  "epoch": 16.59,
3193
  "learning_rate": 1.732283464566929e-05,
3194
+ "loss": 0.0313,
3195
  "step": 531
3196
  },
3197
  {
3198
  "epoch": 16.62,
3199
  "learning_rate": 1.7165354330708663e-05,
3200
+ "loss": 0.0261,
3201
  "step": 532
3202
  },
3203
  {
3204
  "epoch": 16.66,
3205
  "learning_rate": 1.7007874015748032e-05,
3206
+ "loss": 0.0299,
3207
  "step": 533
3208
  },
3209
  {
3210
  "epoch": 16.69,
3211
  "learning_rate": 1.6850393700787404e-05,
3212
+ "loss": 0.0238,
3213
  "step": 534
3214
  },
3215
  {
3216
  "epoch": 16.72,
3217
  "learning_rate": 1.6692913385826773e-05,
3218
+ "loss": 0.0264,
3219
  "step": 535
3220
  },
3221
  {
3222
  "epoch": 16.75,
3223
  "learning_rate": 1.6535433070866142e-05,
3224
+ "loss": 0.0292,
3225
  "step": 536
3226
  },
3227
  {
3228
  "epoch": 16.78,
3229
  "learning_rate": 1.637795275590551e-05,
3230
+ "loss": 0.0255,
3231
  "step": 537
3232
  },
3233
  {
3234
  "epoch": 16.81,
3235
  "learning_rate": 1.6220472440944883e-05,
3236
+ "loss": 0.0328,
3237
  "step": 538
3238
  },
3239
  {
3240
  "epoch": 16.84,
3241
  "learning_rate": 1.6062992125984252e-05,
3242
+ "loss": 0.0309,
3243
  "step": 539
3244
  },
3245
  {
3246
  "epoch": 16.88,
3247
  "learning_rate": 1.5905511811023624e-05,
3248
+ "loss": 0.0287,
3249
  "step": 540
3250
  },
3251
  {
3252
  "epoch": 16.91,
3253
  "learning_rate": 1.5748031496062993e-05,
3254
+ "loss": 0.0261,
3255
  "step": 541
3256
  },
3257
  {
3258
  "epoch": 16.94,
3259
  "learning_rate": 1.5590551181102365e-05,
3260
+ "loss": 0.0323,
3261
  "step": 542
3262
  },
3263
  {
3264
  "epoch": 16.97,
3265
  "learning_rate": 1.5433070866141734e-05,
3266
+ "loss": 0.0248,
3267
  "step": 543
3268
  },
3269
  {
3270
  "epoch": 17.0,
3271
  "learning_rate": 1.5275590551181106e-05,
3272
+ "loss": 0.0305,
3273
  "step": 544
3274
  },
3275
  {
3276
  "epoch": 17.03,
3277
  "learning_rate": 1.5118110236220473e-05,
3278
+ "loss": 0.0231,
3279
  "step": 545
3280
  },
3281
  {
3282
  "epoch": 17.06,
3283
  "learning_rate": 1.4960629921259845e-05,
3284
+ "loss": 0.0244,
3285
  "step": 546
3286
  },
3287
  {
3288
  "epoch": 17.09,
3289
  "learning_rate": 1.4803149606299214e-05,
3290
+ "loss": 0.03,
3291
  "step": 547
3292
  },
3293
  {
3294
  "epoch": 17.12,
3295
  "learning_rate": 1.4645669291338584e-05,
3296
+ "loss": 0.026,
3297
  "step": 548
3298
  },
3299
  {
3300
  "epoch": 17.16,
3301
  "learning_rate": 1.4488188976377953e-05,
3302
+ "loss": 0.0297,
3303
  "step": 549
3304
  },
3305
  {
3306
  "epoch": 17.19,
3307
  "learning_rate": 1.4330708661417325e-05,
3308
+ "loss": 0.0274,
3309
  "step": 550
3310
  },
3311
  {
3312
  "epoch": 17.22,
3313
  "learning_rate": 1.4173228346456694e-05,
3314
+ "loss": 0.0258,
3315
  "step": 551
3316
  },
3317
  {
3318
  "epoch": 17.25,
3319
  "learning_rate": 1.4015748031496065e-05,
3320
+ "loss": 0.0304,
3321
  "step": 552
3322
  },
3323
  {
3324
  "epoch": 17.28,
3325
  "learning_rate": 1.3858267716535433e-05,
3326
+ "loss": 0.0312,
3327
  "step": 553
3328
  },
3329
  {
3330
  "epoch": 17.31,
3331
  "learning_rate": 1.3700787401574806e-05,
3332
+ "loss": 0.0287,
3333
  "step": 554
3334
  },
3335
  {
3336
  "epoch": 17.34,
3337
  "learning_rate": 1.3543307086614174e-05,
3338
+ "loss": 0.0236,
3339
  "step": 555
3340
  },
3341
  {
3342
  "epoch": 17.38,
3343
  "learning_rate": 1.3385826771653545e-05,
3344
+ "loss": 0.0264,
3345
  "step": 556
3346
  },
3347
  {
3348
  "epoch": 17.41,
3349
  "learning_rate": 1.3228346456692914e-05,
3350
+ "loss": 0.0299,
3351
  "step": 557
3352
  },
3353
  {
3354
  "epoch": 17.44,
3355
  "learning_rate": 1.3070866141732286e-05,
3356
+ "loss": 0.0246,
3357
  "step": 558
3358
  },
3359
  {
3360
  "epoch": 17.47,
3361
  "learning_rate": 1.2913385826771655e-05,
3362
+ "loss": 0.029,
3363
  "step": 559
3364
  },
3365
  {
3366
  "epoch": 17.5,
3367
  "learning_rate": 1.2755905511811025e-05,
3368
+ "loss": 0.0267,
3369
  "step": 560
3370
  },
3371
  {
3372
  "epoch": 17.53,
3373
  "learning_rate": 1.2598425196850394e-05,
3374
+ "loss": 0.0239,
3375
  "step": 561
3376
  },
3377
  {
3378
  "epoch": 17.56,
3379
  "learning_rate": 1.2440944881889764e-05,
3380
+ "loss": 0.0241,
3381
  "step": 562
3382
  },
3383
  {
3384
  "epoch": 17.59,
3385
  "learning_rate": 1.2283464566929135e-05,
3386
+ "loss": 0.0274,
3387
  "step": 563
3388
  },
3389
  {
3390
  "epoch": 17.62,
3391
  "learning_rate": 1.2125984251968504e-05,
3392
+ "loss": 0.0309,
3393
  "step": 564
3394
  },
3395
  {
3396
  "epoch": 17.66,
3397
  "learning_rate": 1.1968503937007874e-05,
3398
+ "loss": 0.0283,
3399
  "step": 565
3400
  },
3401
  {
3402
  "epoch": 17.69,
3403
  "learning_rate": 1.1811023622047245e-05,
3404
+ "loss": 0.0238,
3405
  "step": 566
3406
  },
3407
  {
3408
  "epoch": 17.72,
3409
  "learning_rate": 1.1653543307086615e-05,
3410
+ "loss": 0.0267,
3411
  "step": 567
3412
  },
3413
  {
3414
  "epoch": 17.75,
3415
  "learning_rate": 1.1496062992125984e-05,
3416
+ "loss": 0.024,
3417
  "step": 568
3418
  },
3419
  {
3420
  "epoch": 17.78,
3421
  "learning_rate": 1.1338582677165354e-05,
3422
+ "loss": 0.0289,
3423
  "step": 569
3424
  },
3425
  {
3426
  "epoch": 17.81,
3427
  "learning_rate": 1.1181102362204725e-05,
3428
+ "loss": 0.03,
3429
  "step": 570
3430
  },
3431
  {
3432
  "epoch": 17.84,
3433
  "learning_rate": 1.1023622047244095e-05,
3434
+ "loss": 0.0342,
3435
  "step": 571
3436
  },
3437
  {
3438
  "epoch": 17.88,
3439
  "learning_rate": 1.0866141732283466e-05,
3440
+ "loss": 0.0307,
3441
  "step": 572
3442
  },
3443
  {
3444
  "epoch": 17.91,
3445
  "learning_rate": 1.0708661417322835e-05,
3446
+ "loss": 0.0294,
3447
  "step": 573
3448
  },
3449
  {
3450
  "epoch": 17.94,
3451
  "learning_rate": 1.0551181102362205e-05,
3452
+ "loss": 0.0284,
3453
  "step": 574
3454
  },
3455
  {
3456
  "epoch": 17.97,
3457
  "learning_rate": 1.0393700787401575e-05,
3458
+ "loss": 0.031,
3459
  "step": 575
3460
  },
3461
  {
3462
  "epoch": 18.0,
3463
  "learning_rate": 1.0236220472440946e-05,
3464
+ "loss": 0.0263,
3465
  "step": 576
3466
  },
3467
  {
3468
  "epoch": 18.03,
3469
  "learning_rate": 1.0078740157480315e-05,
3470
+ "loss": 0.0224,
3471
  "step": 577
3472
  },
3473
  {
3474
  "epoch": 18.06,
3475
  "learning_rate": 9.921259842519685e-06,
3476
+ "loss": 0.024,
3477
  "step": 578
3478
  },
3479
  {
3480
  "epoch": 18.09,
3481
  "learning_rate": 9.763779527559056e-06,
3482
+ "loss": 0.0274,
3483
  "step": 579
3484
  },
3485
  {
3486
  "epoch": 18.12,
3487
  "learning_rate": 9.606299212598426e-06,
3488
+ "loss": 0.025,
3489
  "step": 580
3490
  },
3491
  {
3492
  "epoch": 18.16,
3493
  "learning_rate": 9.448818897637795e-06,
3494
+ "loss": 0.0244,
3495
  "step": 581
3496
  },
3497
  {
3498
  "epoch": 18.19,
3499
  "learning_rate": 9.291338582677165e-06,
3500
+ "loss": 0.0253,
3501
  "step": 582
3502
  },
3503
  {
3504
  "epoch": 18.22,
3505
  "learning_rate": 9.133858267716536e-06,
3506
+ "loss": 0.0238,
3507
  "step": 583
3508
  },
3509
  {
3510
  "epoch": 18.25,
3511
  "learning_rate": 8.976377952755906e-06,
3512
+ "loss": 0.0265,
3513
  "step": 584
3514
  },
3515
  {
3516
  "epoch": 18.28,
3517
  "learning_rate": 8.818897637795275e-06,
3518
+ "loss": 0.0278,
3519
  "step": 585
3520
  },
3521
  {
3522
  "epoch": 18.31,
3523
  "learning_rate": 8.661417322834646e-06,
3524
+ "loss": 0.0242,
3525
  "step": 586
3526
  },
3527
  {
3528
  "epoch": 18.34,
3529
  "learning_rate": 8.503937007874016e-06,
3530
+ "loss": 0.026,
3531
  "step": 587
3532
  },
3533
  {
3534
  "epoch": 18.38,
3535
  "learning_rate": 8.346456692913387e-06,
3536
+ "loss": 0.0196,
3537
  "step": 588
3538
  },
3539
  {
3540
  "epoch": 18.41,
3541
  "learning_rate": 8.188976377952755e-06,
3542
+ "loss": 0.0245,
3543
  "step": 589
3544
  },
3545
  {
3546
  "epoch": 18.44,
3547
  "learning_rate": 8.031496062992126e-06,
3548
+ "loss": 0.0259,
3549
  "step": 590
3550
  },
3551
  {
3552
  "epoch": 18.47,
3553
  "learning_rate": 7.874015748031496e-06,
3554
+ "loss": 0.033,
3555
  "step": 591
3556
  },
3557
  {
3558
  "epoch": 18.5,
3559
  "learning_rate": 7.716535433070867e-06,
3560
+ "loss": 0.0291,
3561
  "step": 592
3562
  },
3563
  {
3564
  "epoch": 18.53,
3565
  "learning_rate": 7.5590551181102365e-06,
3566
+ "loss": 0.0272,
3567
  "step": 593
3568
  },
3569
  {
3570
  "epoch": 18.56,
3571
  "learning_rate": 7.401574803149607e-06,
3572
+ "loss": 0.0266,
3573
  "step": 594
3574
  },
3575
  {
3576
  "epoch": 18.59,
3577
  "learning_rate": 7.244094488188977e-06,
3578
+ "loss": 0.0274,
3579
  "step": 595
3580
  },
3581
  {
3582
  "epoch": 18.62,
3583
  "learning_rate": 7.086614173228347e-06,
3584
+ "loss": 0.0261,
3585
  "step": 596
3586
  },
3587
  {
3588
  "epoch": 18.66,
3589
  "learning_rate": 6.929133858267717e-06,
3590
+ "loss": 0.0259,
3591
  "step": 597
3592
  },
3593
  {
3594
  "epoch": 18.69,
3595
  "learning_rate": 6.771653543307087e-06,
3596
+ "loss": 0.0328,
3597
  "step": 598
3598
  },
3599
  {
3600
  "epoch": 18.72,
3601
  "learning_rate": 6.614173228346457e-06,
3602
+ "loss": 0.0321,
3603
  "step": 599
3604
  },
3605
  {
3606
  "epoch": 18.75,
3607
  "learning_rate": 6.456692913385827e-06,
3608
+ "loss": 0.0283,
3609
  "step": 600
3610
  },
3611
  {
3612
  "epoch": 18.78,
3613
  "learning_rate": 6.299212598425197e-06,
3614
+ "loss": 0.0253,
3615
  "step": 601
3616
  },
3617
  {
3618
  "epoch": 18.81,
3619
  "learning_rate": 6.141732283464567e-06,
3620
+ "loss": 0.0346,
3621
  "step": 602
3622
  },
3623
  {
3624
  "epoch": 18.84,
3625
  "learning_rate": 5.984251968503937e-06,
3626
+ "loss": 0.0264,
3627
  "step": 603
3628
  },
3629
  {
3630
  "epoch": 18.88,
3631
  "learning_rate": 5.8267716535433075e-06,
3632
+ "loss": 0.026,
3633
  "step": 604
3634
  },
3635
  {
3636
  "epoch": 18.91,
3637
  "learning_rate": 5.669291338582677e-06,
3638
+ "loss": 0.0303,
3639
  "step": 605
3640
  },
3641
  {
3642
  "epoch": 18.94,
3643
  "learning_rate": 5.511811023622048e-06,
3644
+ "loss": 0.0326,
3645
  "step": 606
3646
  },
3647
  {
3648
  "epoch": 18.97,
3649
  "learning_rate": 5.354330708661417e-06,
3650
+ "loss": 0.0302,
3651
  "step": 607
3652
  },
3653
  {
3654
  "epoch": 19.0,
3655
  "learning_rate": 5.196850393700788e-06,
3656
+ "loss": 0.0288,
3657
  "step": 608
3658
  },
3659
  {
3660
  "epoch": 19.03,
3661
  "learning_rate": 5.039370078740157e-06,
3662
+ "loss": 0.0279,
3663
  "step": 609
3664
  },
3665
  {
3666
  "epoch": 19.06,
3667
  "learning_rate": 4.881889763779528e-06,
3668
+ "loss": 0.0207,
3669
  "step": 610
3670
  },
3671
  {
3672
  "epoch": 19.09,
3673
  "learning_rate": 4.7244094488188975e-06,
3674
+ "loss": 0.0282,
3675
  "step": 611
3676
  },
3677
  {
3678
  "epoch": 19.12,
3679
  "learning_rate": 4.566929133858268e-06,
3680
+ "loss": 0.0234,
3681
  "step": 612
3682
  },
3683
  {
3684
  "epoch": 19.16,
3685
  "learning_rate": 4.409448818897638e-06,
3686
+ "loss": 0.0305,
3687
  "step": 613
3688
  },
3689
  {
3690
  "epoch": 19.19,
3691
  "learning_rate": 4.251968503937008e-06,
3692
+ "loss": 0.0269,
3693
  "step": 614
3694
  },
3695
  {
3696
  "epoch": 19.22,
3697
  "learning_rate": 4.094488188976378e-06,
3698
+ "loss": 0.0254,
3699
  "step": 615
3700
  },
3701
  {
3702
  "epoch": 19.25,
3703
  "learning_rate": 3.937007874015748e-06,
3704
+ "loss": 0.029,
3705
  "step": 616
3706
  },
3707
  {
3708
  "epoch": 19.28,
3709
  "learning_rate": 3.7795275590551182e-06,
3710
+ "loss": 0.0275,
3711
  "step": 617
3712
  },
3713
  {
3714
  "epoch": 19.31,
3715
  "learning_rate": 3.6220472440944883e-06,
3716
+ "loss": 0.024,
3717
  "step": 618
3718
  },
3719
  {
3720
  "epoch": 19.34,
3721
  "learning_rate": 3.4645669291338583e-06,
3722
+ "loss": 0.0265,
3723
  "step": 619
3724
  },
3725
  {
3726
  "epoch": 19.38,
3727
  "learning_rate": 3.3070866141732284e-06,
3728
+ "loss": 0.0235,
3729
  "step": 620
3730
  },
3731
  {
3732
  "epoch": 19.41,
3733
  "learning_rate": 3.1496062992125985e-06,
3734
+ "loss": 0.0288,
3735
  "step": 621
3736
  },
3737
  {
3738
  "epoch": 19.44,
3739
  "learning_rate": 2.9921259842519685e-06,
3740
+ "loss": 0.0296,
3741
  "step": 622
3742
  },
3743
  {
3744
  "epoch": 19.47,
3745
  "learning_rate": 2.8346456692913386e-06,
3746
+ "loss": 0.0247,
3747
  "step": 623
3748
  },
3749
  {
3750
  "epoch": 19.5,
3751
  "learning_rate": 2.6771653543307086e-06,
3752
+ "loss": 0.0282,
3753
  "step": 624
3754
  },
3755
  {
3756
  "epoch": 19.53,
3757
  "learning_rate": 2.5196850393700787e-06,
3758
+ "loss": 0.0309,
3759
  "step": 625
3760
  },
3761
  {
3762
  "epoch": 19.56,
3763
  "learning_rate": 2.3622047244094487e-06,
3764
+ "loss": 0.0215,
3765
  "step": 626
3766
  },
3767
  {
3768
  "epoch": 19.59,
3769
  "learning_rate": 2.204724409448819e-06,
3770
+ "loss": 0.0312,
3771
  "step": 627
3772
  },
3773
  {
3774
  "epoch": 19.62,
3775
  "learning_rate": 2.047244094488189e-06,
3776
+ "loss": 0.0225,
3777
  "step": 628
3778
  },
3779
  {
3780
  "epoch": 19.66,
3781
  "learning_rate": 1.8897637795275591e-06,
3782
+ "loss": 0.0255,
3783
  "step": 629
3784
  },
3785
  {
3786
  "epoch": 19.69,
3787
  "learning_rate": 1.7322834645669292e-06,
3788
+ "loss": 0.0265,
3789
  "step": 630
3790
  },
3791
  {
3792
  "epoch": 19.72,
3793
  "learning_rate": 1.5748031496062992e-06,
3794
+ "loss": 0.0319,
3795
  "step": 631
3796
  },
3797
  {
3798
  "epoch": 19.75,
3799
  "learning_rate": 1.4173228346456693e-06,
3800
+ "loss": 0.0267,
3801
  "step": 632
3802
  },
3803
  {
3804
  "epoch": 19.78,
3805
  "learning_rate": 1.2598425196850393e-06,
3806
+ "loss": 0.0254,
3807
  "step": 633
3808
  },
3809
  {
3810
  "epoch": 19.81,
3811
  "learning_rate": 1.1023622047244094e-06,
3812
+ "loss": 0.0237,
3813
  "step": 634
3814
  },
3815
  {
3816
  "epoch": 19.84,
3817
  "learning_rate": 9.448818897637796e-07,
3818
+ "loss": 0.0253,
3819
  "step": 635
3820
  },
3821
  {
3822
  "epoch": 19.88,
3823
  "learning_rate": 7.874015748031496e-07,
3824
+ "loss": 0.0236,
3825
  "step": 636
3826
  },
3827
  {
3828
  "epoch": 19.91,
3829
  "learning_rate": 6.299212598425197e-07,
3830
+ "loss": 0.0303,
3831
  "step": 637
3832
  },
3833
  {
3834
  "epoch": 19.94,
3835
  "learning_rate": 4.724409448818898e-07,
3836
+ "loss": 0.0232,
3837
  "step": 638
3838
  },
3839
  {
3840
  "epoch": 19.97,
3841
  "learning_rate": 3.1496062992125984e-07,
3842
+ "loss": 0.0314,
3843
  "step": 639
3844
  },
3845
  {
3846
  "epoch": 20.0,
3847
  "learning_rate": 1.5748031496062992e-07,
3848
+ "loss": 0.028,
3849
  "step": 640
3850
  },
3851
  {
3852
  "epoch": 20.0,
3853
  "step": 640,
3854
+ "total_flos": 4.839674807844864e+16,
3855
+ "train_loss": 0.2799676021706546,
3856
+ "train_runtime": 1494.9511,
3857
+ "train_samples_per_second": 1.712,
3858
+ "train_steps_per_second": 0.428
3859
  }
3860
  ],
3861
  "logging_steps": 1,
3862
  "max_steps": 640,
3863
  "num_train_epochs": 20,
3864
  "save_steps": 500,
3865
+ "total_flos": 4.839674807844864e+16,
3866
  "trial_name": null,
3867
  "trial_params": null
3868
  }