therealcyberlord commited on
Commit
81f244e
1 Parent(s): 88d72c8

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1741
trainer_state.json DELETED
@@ -1,1741 +0,0 @@
1
- {
2
- "best_metric": 0.7031954526901245,
3
- "best_model_checkpoint": "./vit-stanford-cars/checkpoint-2500",
4
- "epoch": 9.803921568627452,
5
- "global_step": 2500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.04,
12
- "learning_rate": 1.6000000000000001e-06,
13
- "loss": 5.3806,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.08,
18
- "learning_rate": 3.2000000000000003e-06,
19
- "loss": 5.4127,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.12,
24
- "learning_rate": 4.6400000000000005e-06,
25
- "loss": 5.3923,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.16,
30
- "learning_rate": 6.24e-06,
31
- "loss": 5.4098,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.2,
36
- "learning_rate": 7.840000000000001e-06,
37
- "loss": 5.3607,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.24,
42
- "learning_rate": 9.440000000000001e-06,
43
- "loss": 5.3706,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.27,
48
- "learning_rate": 1.1040000000000001e-05,
49
- "loss": 5.3293,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.31,
54
- "learning_rate": 1.2640000000000001e-05,
55
- "loss": 5.3037,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.35,
60
- "learning_rate": 1.4240000000000001e-05,
61
- "loss": 5.2453,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.39,
66
- "learning_rate": 1.584e-05,
67
- "loss": 5.2552,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.39,
72
- "eval_accuracy": 0.0125,
73
- "eval_loss": 5.2306718826293945,
74
- "eval_runtime": 39.7246,
75
- "eval_samples_per_second": 50.347,
76
- "eval_steps_per_second": 1.586,
77
- "step": 100
78
- },
79
- {
80
- "epoch": 0.43,
81
- "learning_rate": 1.7440000000000002e-05,
82
- "loss": 5.1976,
83
- "step": 110
84
- },
85
- {
86
- "epoch": 0.47,
87
- "learning_rate": 1.904e-05,
88
- "loss": 5.1826,
89
- "step": 120
90
- },
91
- {
92
- "epoch": 0.51,
93
- "learning_rate": 2.0640000000000002e-05,
94
- "loss": 5.1389,
95
- "step": 130
96
- },
97
- {
98
- "epoch": 0.55,
99
- "learning_rate": 2.2240000000000004e-05,
100
- "loss": 5.0934,
101
- "step": 140
102
- },
103
- {
104
- "epoch": 0.59,
105
- "learning_rate": 2.3840000000000002e-05,
106
- "loss": 5.0457,
107
- "step": 150
108
- },
109
- {
110
- "epoch": 0.63,
111
- "learning_rate": 2.5440000000000004e-05,
112
- "loss": 5.0993,
113
- "step": 160
114
- },
115
- {
116
- "epoch": 0.67,
117
- "learning_rate": 2.7040000000000005e-05,
118
- "loss": 4.9771,
119
- "step": 170
120
- },
121
- {
122
- "epoch": 0.71,
123
- "learning_rate": 2.864e-05,
124
- "loss": 4.9128,
125
- "step": 180
126
- },
127
- {
128
- "epoch": 0.75,
129
- "learning_rate": 3.0240000000000002e-05,
130
- "loss": 4.8988,
131
- "step": 190
132
- },
133
- {
134
- "epoch": 0.78,
135
- "learning_rate": 3.184000000000001e-05,
136
- "loss": 4.7653,
137
- "step": 200
138
- },
139
- {
140
- "epoch": 0.78,
141
- "eval_accuracy": 0.079,
142
- "eval_loss": 4.759223461151123,
143
- "eval_runtime": 40.216,
144
- "eval_samples_per_second": 49.732,
145
- "eval_steps_per_second": 1.567,
146
- "step": 200
147
- },
148
- {
149
- "epoch": 0.82,
150
- "learning_rate": 3.344e-05,
151
- "loss": 4.7304,
152
- "step": 210
153
- },
154
- {
155
- "epoch": 0.86,
156
- "learning_rate": 3.504e-05,
157
- "loss": 4.7613,
158
- "step": 220
159
- },
160
- {
161
- "epoch": 0.9,
162
- "learning_rate": 3.664e-05,
163
- "loss": 4.6521,
164
- "step": 230
165
- },
166
- {
167
- "epoch": 0.94,
168
- "learning_rate": 3.824e-05,
169
- "loss": 4.6012,
170
- "step": 240
171
- },
172
- {
173
- "epoch": 0.98,
174
- "learning_rate": 3.9840000000000005e-05,
175
- "loss": 4.4954,
176
- "step": 250
177
- },
178
- {
179
- "epoch": 1.02,
180
- "learning_rate": 4.144e-05,
181
- "loss": 4.3733,
182
- "step": 260
183
- },
184
- {
185
- "epoch": 1.06,
186
- "learning_rate": 4.304000000000001e-05,
187
- "loss": 4.1471,
188
- "step": 270
189
- },
190
- {
191
- "epoch": 1.1,
192
- "learning_rate": 4.4640000000000006e-05,
193
- "loss": 4.1306,
194
- "step": 280
195
- },
196
- {
197
- "epoch": 1.14,
198
- "learning_rate": 4.624e-05,
199
- "loss": 4.0396,
200
- "step": 290
201
- },
202
- {
203
- "epoch": 1.18,
204
- "learning_rate": 4.784e-05,
205
- "loss": 3.9779,
206
- "step": 300
207
- },
208
- {
209
- "epoch": 1.18,
210
- "eval_accuracy": 0.193,
211
- "eval_loss": 4.070125579833984,
212
- "eval_runtime": 40.3837,
213
- "eval_samples_per_second": 49.525,
214
- "eval_steps_per_second": 1.56,
215
- "step": 300
216
- },
217
- {
218
- "epoch": 1.22,
219
- "learning_rate": 4.944e-05,
220
- "loss": 3.8729,
221
- "step": 310
222
- },
223
- {
224
- "epoch": 1.25,
225
- "learning_rate": 5.1040000000000006e-05,
226
- "loss": 3.7538,
227
- "step": 320
228
- },
229
- {
230
- "epoch": 1.29,
231
- "learning_rate": 5.2640000000000004e-05,
232
- "loss": 3.6952,
233
- "step": 330
234
- },
235
- {
236
- "epoch": 1.33,
237
- "learning_rate": 5.424000000000001e-05,
238
- "loss": 3.6388,
239
- "step": 340
240
- },
241
- {
242
- "epoch": 1.37,
243
- "learning_rate": 5.584e-05,
244
- "loss": 3.6777,
245
- "step": 350
246
- },
247
- {
248
- "epoch": 1.41,
249
- "learning_rate": 5.7440000000000006e-05,
250
- "loss": 3.5145,
251
- "step": 360
252
- },
253
- {
254
- "epoch": 1.45,
255
- "learning_rate": 5.9040000000000004e-05,
256
- "loss": 3.5315,
257
- "step": 370
258
- },
259
- {
260
- "epoch": 1.49,
261
- "learning_rate": 6.064e-05,
262
- "loss": 3.4288,
263
- "step": 380
264
- },
265
- {
266
- "epoch": 1.53,
267
- "learning_rate": 6.224e-05,
268
- "loss": 3.3541,
269
- "step": 390
270
- },
271
- {
272
- "epoch": 1.57,
273
- "learning_rate": 6.384000000000001e-05,
274
- "loss": 3.2754,
275
- "step": 400
276
- },
277
- {
278
- "epoch": 1.57,
279
- "eval_accuracy": 0.3305,
280
- "eval_loss": 3.3328330516815186,
281
- "eval_runtime": 40.7474,
282
- "eval_samples_per_second": 49.083,
283
- "eval_steps_per_second": 1.546,
284
- "step": 400
285
- },
286
- {
287
- "epoch": 1.61,
288
- "learning_rate": 6.544e-05,
289
- "loss": 3.1604,
290
- "step": 410
291
- },
292
- {
293
- "epoch": 1.65,
294
- "learning_rate": 6.704000000000001e-05,
295
- "loss": 3.1043,
296
- "step": 420
297
- },
298
- {
299
- "epoch": 1.69,
300
- "learning_rate": 6.864000000000001e-05,
301
- "loss": 2.9978,
302
- "step": 430
303
- },
304
- {
305
- "epoch": 1.73,
306
- "learning_rate": 7.024e-05,
307
- "loss": 2.9723,
308
- "step": 440
309
- },
310
- {
311
- "epoch": 1.76,
312
- "learning_rate": 7.184e-05,
313
- "loss": 2.9747,
314
- "step": 450
315
- },
316
- {
317
- "epoch": 1.8,
318
- "learning_rate": 7.344000000000002e-05,
319
- "loss": 2.882,
320
- "step": 460
321
- },
322
- {
323
- "epoch": 1.84,
324
- "learning_rate": 7.504e-05,
325
- "loss": 2.7142,
326
- "step": 470
327
- },
328
- {
329
- "epoch": 1.88,
330
- "learning_rate": 7.664e-05,
331
- "loss": 2.7157,
332
- "step": 480
333
- },
334
- {
335
- "epoch": 1.92,
336
- "learning_rate": 7.824000000000001e-05,
337
- "loss": 2.6695,
338
- "step": 490
339
- },
340
- {
341
- "epoch": 1.96,
342
- "learning_rate": 7.984000000000001e-05,
343
- "loss": 2.5607,
344
- "step": 500
345
- },
346
- {
347
- "epoch": 1.96,
348
- "eval_accuracy": 0.491,
349
- "eval_loss": 2.6118950843811035,
350
- "eval_runtime": 40.1814,
351
- "eval_samples_per_second": 49.774,
352
- "eval_steps_per_second": 1.568,
353
- "step": 500
354
- },
355
- {
356
- "epoch": 2.0,
357
- "learning_rate": 7.964878048780489e-05,
358
- "loss": 2.6025,
359
- "step": 510
360
- },
361
- {
362
- "epoch": 2.04,
363
- "learning_rate": 7.925853658536585e-05,
364
- "loss": 1.9918,
365
- "step": 520
366
- },
367
- {
368
- "epoch": 2.08,
369
- "learning_rate": 7.886829268292684e-05,
370
- "loss": 1.8799,
371
- "step": 530
372
- },
373
- {
374
- "epoch": 2.12,
375
- "learning_rate": 7.84780487804878e-05,
376
- "loss": 1.7169,
377
- "step": 540
378
- },
379
- {
380
- "epoch": 2.16,
381
- "learning_rate": 7.808780487804879e-05,
382
- "loss": 1.7148,
383
- "step": 550
384
- },
385
- {
386
- "epoch": 2.2,
387
- "learning_rate": 7.769756097560977e-05,
388
- "loss": 1.7091,
389
- "step": 560
390
- },
391
- {
392
- "epoch": 2.24,
393
- "learning_rate": 7.730731707317074e-05,
394
- "loss": 1.569,
395
- "step": 570
396
- },
397
- {
398
- "epoch": 2.27,
399
- "learning_rate": 7.691707317073172e-05,
400
- "loss": 1.5601,
401
- "step": 580
402
- },
403
- {
404
- "epoch": 2.31,
405
- "learning_rate": 7.652682926829269e-05,
406
- "loss": 1.5463,
407
- "step": 590
408
- },
409
- {
410
- "epoch": 2.35,
411
- "learning_rate": 7.613658536585367e-05,
412
- "loss": 1.5249,
413
- "step": 600
414
- },
415
- {
416
- "epoch": 2.35,
417
- "eval_accuracy": 0.61,
418
- "eval_loss": 2.0372462272644043,
419
- "eval_runtime": 40.501,
420
- "eval_samples_per_second": 49.381,
421
- "eval_steps_per_second": 1.556,
422
- "step": 600
423
- },
424
- {
425
- "epoch": 2.39,
426
- "learning_rate": 7.574634146341463e-05,
427
- "loss": 1.4028,
428
- "step": 610
429
- },
430
- {
431
- "epoch": 2.43,
432
- "learning_rate": 7.535609756097562e-05,
433
- "loss": 1.3612,
434
- "step": 620
435
- },
436
- {
437
- "epoch": 2.47,
438
- "learning_rate": 7.49658536585366e-05,
439
- "loss": 1.3848,
440
- "step": 630
441
- },
442
- {
443
- "epoch": 2.51,
444
- "learning_rate": 7.457560975609757e-05,
445
- "loss": 1.3545,
446
- "step": 640
447
- },
448
- {
449
- "epoch": 2.55,
450
- "learning_rate": 7.418536585365855e-05,
451
- "loss": 1.2483,
452
- "step": 650
453
- },
454
- {
455
- "epoch": 2.59,
456
- "learning_rate": 7.379512195121952e-05,
457
- "loss": 1.2979,
458
- "step": 660
459
- },
460
- {
461
- "epoch": 2.63,
462
- "learning_rate": 7.34048780487805e-05,
463
- "loss": 1.2087,
464
- "step": 670
465
- },
466
- {
467
- "epoch": 2.67,
468
- "learning_rate": 7.301463414634147e-05,
469
- "loss": 1.2661,
470
- "step": 680
471
- },
472
- {
473
- "epoch": 2.71,
474
- "learning_rate": 7.262439024390245e-05,
475
- "loss": 1.2181,
476
- "step": 690
477
- },
478
- {
479
- "epoch": 2.75,
480
- "learning_rate": 7.223414634146343e-05,
481
- "loss": 1.1915,
482
- "step": 700
483
- },
484
- {
485
- "epoch": 2.75,
486
- "eval_accuracy": 0.663,
487
- "eval_loss": 1.6082996129989624,
488
- "eval_runtime": 39.9953,
489
- "eval_samples_per_second": 50.006,
490
- "eval_steps_per_second": 1.575,
491
- "step": 700
492
- },
493
- {
494
- "epoch": 2.78,
495
- "learning_rate": 7.18439024390244e-05,
496
- "loss": 1.1685,
497
- "step": 710
498
- },
499
- {
500
- "epoch": 2.82,
501
- "learning_rate": 7.145365853658538e-05,
502
- "loss": 1.0873,
503
- "step": 720
504
- },
505
- {
506
- "epoch": 2.86,
507
- "learning_rate": 7.106341463414635e-05,
508
- "loss": 1.1013,
509
- "step": 730
510
- },
511
- {
512
- "epoch": 2.9,
513
- "learning_rate": 7.067317073170733e-05,
514
- "loss": 1.1133,
515
- "step": 740
516
- },
517
- {
518
- "epoch": 2.94,
519
- "learning_rate": 7.02829268292683e-05,
520
- "loss": 1.0955,
521
- "step": 750
522
- },
523
- {
524
- "epoch": 2.98,
525
- "learning_rate": 6.989268292682928e-05,
526
- "loss": 0.9469,
527
- "step": 760
528
- },
529
- {
530
- "epoch": 3.02,
531
- "learning_rate": 6.950243902439025e-05,
532
- "loss": 0.7376,
533
- "step": 770
534
- },
535
- {
536
- "epoch": 3.06,
537
- "learning_rate": 6.911219512195123e-05,
538
- "loss": 0.4818,
539
- "step": 780
540
- },
541
- {
542
- "epoch": 3.1,
543
- "learning_rate": 6.87219512195122e-05,
544
- "loss": 0.4825,
545
- "step": 790
546
- },
547
- {
548
- "epoch": 3.14,
549
- "learning_rate": 6.833170731707318e-05,
550
- "loss": 0.4414,
551
- "step": 800
552
- },
553
- {
554
- "epoch": 3.14,
555
- "eval_accuracy": 0.725,
556
- "eval_loss": 1.320522665977478,
557
- "eval_runtime": 40.1363,
558
- "eval_samples_per_second": 49.83,
559
- "eval_steps_per_second": 1.57,
560
- "step": 800
561
- },
562
- {
563
- "epoch": 3.18,
564
- "learning_rate": 6.794146341463415e-05,
565
- "loss": 0.4289,
566
- "step": 810
567
- },
568
- {
569
- "epoch": 3.22,
570
- "learning_rate": 6.755121951219513e-05,
571
- "loss": 0.3919,
572
- "step": 820
573
- },
574
- {
575
- "epoch": 3.25,
576
- "learning_rate": 6.71609756097561e-05,
577
- "loss": 0.3715,
578
- "step": 830
579
- },
580
- {
581
- "epoch": 3.29,
582
- "learning_rate": 6.677073170731708e-05,
583
- "loss": 0.3895,
584
- "step": 840
585
- },
586
- {
587
- "epoch": 3.33,
588
- "learning_rate": 6.638048780487805e-05,
589
- "loss": 0.3804,
590
- "step": 850
591
- },
592
- {
593
- "epoch": 3.37,
594
- "learning_rate": 6.599024390243903e-05,
595
- "loss": 0.3973,
596
- "step": 860
597
- },
598
- {
599
- "epoch": 3.41,
600
- "learning_rate": 6.56e-05,
601
- "loss": 0.3987,
602
- "step": 870
603
- },
604
- {
605
- "epoch": 3.45,
606
- "learning_rate": 6.520975609756098e-05,
607
- "loss": 0.3903,
608
- "step": 880
609
- },
610
- {
611
- "epoch": 3.49,
612
- "learning_rate": 6.481951219512196e-05,
613
- "loss": 0.3469,
614
- "step": 890
615
- },
616
- {
617
- "epoch": 3.53,
618
- "learning_rate": 6.442926829268294e-05,
619
- "loss": 0.323,
620
- "step": 900
621
- },
622
- {
623
- "epoch": 3.53,
624
- "eval_accuracy": 0.7615,
625
- "eval_loss": 1.1008645296096802,
626
- "eval_runtime": 41.4264,
627
- "eval_samples_per_second": 48.278,
628
- "eval_steps_per_second": 1.521,
629
- "step": 900
630
- },
631
- {
632
- "epoch": 3.57,
633
- "learning_rate": 6.403902439024391e-05,
634
- "loss": 0.3168,
635
- "step": 910
636
- },
637
- {
638
- "epoch": 3.61,
639
- "learning_rate": 6.364878048780489e-05,
640
- "loss": 0.3404,
641
- "step": 920
642
- },
643
- {
644
- "epoch": 3.65,
645
- "learning_rate": 6.325853658536586e-05,
646
- "loss": 0.3553,
647
- "step": 930
648
- },
649
- {
650
- "epoch": 3.69,
651
- "learning_rate": 6.286829268292684e-05,
652
- "loss": 0.3665,
653
- "step": 940
654
- },
655
- {
656
- "epoch": 3.73,
657
- "learning_rate": 6.247804878048781e-05,
658
- "loss": 0.2983,
659
- "step": 950
660
- },
661
- {
662
- "epoch": 3.76,
663
- "learning_rate": 6.208780487804879e-05,
664
- "loss": 0.2886,
665
- "step": 960
666
- },
667
- {
668
- "epoch": 3.8,
669
- "learning_rate": 6.169756097560976e-05,
670
- "loss": 0.2721,
671
- "step": 970
672
- },
673
- {
674
- "epoch": 3.84,
675
- "learning_rate": 6.130731707317074e-05,
676
- "loss": 0.2898,
677
- "step": 980
678
- },
679
- {
680
- "epoch": 3.88,
681
- "learning_rate": 6.0917073170731714e-05,
682
- "loss": 0.3222,
683
- "step": 990
684
- },
685
- {
686
- "epoch": 3.92,
687
- "learning_rate": 6.052682926829269e-05,
688
- "loss": 0.2993,
689
- "step": 1000
690
- },
691
- {
692
- "epoch": 3.92,
693
- "eval_accuracy": 0.766,
694
- "eval_loss": 1.0115569829940796,
695
- "eval_runtime": 40.1299,
696
- "eval_samples_per_second": 49.838,
697
- "eval_steps_per_second": 1.57,
698
- "step": 1000
699
- },
700
- {
701
- "epoch": 3.96,
702
- "learning_rate": 6.0136585365853664e-05,
703
- "loss": 0.2903,
704
- "step": 1010
705
- },
706
- {
707
- "epoch": 4.0,
708
- "learning_rate": 5.974634146341464e-05,
709
- "loss": 0.2707,
710
- "step": 1020
711
- },
712
- {
713
- "epoch": 4.04,
714
- "learning_rate": 5.9356097560975614e-05,
715
- "loss": 0.0953,
716
- "step": 1030
717
- },
718
- {
719
- "epoch": 4.08,
720
- "learning_rate": 5.896585365853659e-05,
721
- "loss": 0.1066,
722
- "step": 1040
723
- },
724
- {
725
- "epoch": 4.12,
726
- "learning_rate": 5.8575609756097564e-05,
727
- "loss": 0.0944,
728
- "step": 1050
729
- },
730
- {
731
- "epoch": 4.16,
732
- "learning_rate": 5.818536585365854e-05,
733
- "loss": 0.0943,
734
- "step": 1060
735
- },
736
- {
737
- "epoch": 4.2,
738
- "learning_rate": 5.779512195121951e-05,
739
- "loss": 0.0985,
740
- "step": 1070
741
- },
742
- {
743
- "epoch": 4.24,
744
- "learning_rate": 5.740487804878049e-05,
745
- "loss": 0.1034,
746
- "step": 1080
747
- },
748
- {
749
- "epoch": 4.27,
750
- "learning_rate": 5.701463414634146e-05,
751
- "loss": 0.1015,
752
- "step": 1090
753
- },
754
- {
755
- "epoch": 4.31,
756
- "learning_rate": 5.662439024390244e-05,
757
- "loss": 0.1024,
758
- "step": 1100
759
- },
760
- {
761
- "epoch": 4.31,
762
- "eval_accuracy": 0.7895,
763
- "eval_loss": 0.9027227759361267,
764
- "eval_runtime": 40.6004,
765
- "eval_samples_per_second": 49.261,
766
- "eval_steps_per_second": 1.552,
767
- "step": 1100
768
- },
769
- {
770
- "epoch": 4.35,
771
- "learning_rate": 5.623414634146341e-05,
772
- "loss": 0.0779,
773
- "step": 1110
774
- },
775
- {
776
- "epoch": 4.39,
777
- "learning_rate": 5.584390243902439e-05,
778
- "loss": 0.0754,
779
- "step": 1120
780
- },
781
- {
782
- "epoch": 4.43,
783
- "learning_rate": 5.545365853658537e-05,
784
- "loss": 0.0754,
785
- "step": 1130
786
- },
787
- {
788
- "epoch": 4.47,
789
- "learning_rate": 5.5063414634146344e-05,
790
- "loss": 0.0724,
791
- "step": 1140
792
- },
793
- {
794
- "epoch": 4.51,
795
- "learning_rate": 5.4673170731707326e-05,
796
- "loss": 0.0765,
797
- "step": 1150
798
- },
799
- {
800
- "epoch": 4.55,
801
- "learning_rate": 5.42829268292683e-05,
802
- "loss": 0.0854,
803
- "step": 1160
804
- },
805
- {
806
- "epoch": 4.59,
807
- "learning_rate": 5.3892682926829276e-05,
808
- "loss": 0.0619,
809
- "step": 1170
810
- },
811
- {
812
- "epoch": 4.63,
813
- "learning_rate": 5.350243902439025e-05,
814
- "loss": 0.0679,
815
- "step": 1180
816
- },
817
- {
818
- "epoch": 4.67,
819
- "learning_rate": 5.3112195121951225e-05,
820
- "loss": 0.0836,
821
- "step": 1190
822
- },
823
- {
824
- "epoch": 4.71,
825
- "learning_rate": 5.27219512195122e-05,
826
- "loss": 0.097,
827
- "step": 1200
828
- },
829
- {
830
- "epoch": 4.71,
831
- "eval_accuracy": 0.7825,
832
- "eval_loss": 0.8696740865707397,
833
- "eval_runtime": 39.9428,
834
- "eval_samples_per_second": 50.072,
835
- "eval_steps_per_second": 1.577,
836
- "step": 1200
837
- },
838
- {
839
- "epoch": 4.75,
840
- "learning_rate": 5.2331707317073175e-05,
841
- "loss": 0.0763,
842
- "step": 1210
843
- },
844
- {
845
- "epoch": 4.78,
846
- "learning_rate": 5.194146341463415e-05,
847
- "loss": 0.0863,
848
- "step": 1220
849
- },
850
- {
851
- "epoch": 4.82,
852
- "learning_rate": 5.1551219512195125e-05,
853
- "loss": 0.0827,
854
- "step": 1230
855
- },
856
- {
857
- "epoch": 4.86,
858
- "learning_rate": 5.11609756097561e-05,
859
- "loss": 0.0929,
860
- "step": 1240
861
- },
862
- {
863
- "epoch": 4.9,
864
- "learning_rate": 5.0770731707317075e-05,
865
- "loss": 0.0619,
866
- "step": 1250
867
- },
868
- {
869
- "epoch": 4.94,
870
- "learning_rate": 5.0380487804878056e-05,
871
- "loss": 0.0827,
872
- "step": 1260
873
- },
874
- {
875
- "epoch": 4.98,
876
- "learning_rate": 4.999024390243903e-05,
877
- "loss": 0.0694,
878
- "step": 1270
879
- },
880
- {
881
- "epoch": 5.02,
882
- "learning_rate": 4.9600000000000006e-05,
883
- "loss": 0.0603,
884
- "step": 1280
885
- },
886
- {
887
- "epoch": 5.06,
888
- "learning_rate": 4.920975609756098e-05,
889
- "loss": 0.0233,
890
- "step": 1290
891
- },
892
- {
893
- "epoch": 5.1,
894
- "learning_rate": 4.8819512195121956e-05,
895
- "loss": 0.0273,
896
- "step": 1300
897
- },
898
- {
899
- "epoch": 5.1,
900
- "eval_accuracy": 0.805,
901
- "eval_loss": 0.7953792810440063,
902
- "eval_runtime": 39.9571,
903
- "eval_samples_per_second": 50.054,
904
- "eval_steps_per_second": 1.577,
905
- "step": 1300
906
- },
907
- {
908
- "epoch": 5.14,
909
- "learning_rate": 4.842926829268293e-05,
910
- "loss": 0.0197,
911
- "step": 1310
912
- },
913
- {
914
- "epoch": 5.18,
915
- "learning_rate": 4.8039024390243906e-05,
916
- "loss": 0.0194,
917
- "step": 1320
918
- },
919
- {
920
- "epoch": 5.22,
921
- "learning_rate": 4.764878048780488e-05,
922
- "loss": 0.0326,
923
- "step": 1330
924
- },
925
- {
926
- "epoch": 5.25,
927
- "learning_rate": 4.7258536585365856e-05,
928
- "loss": 0.0367,
929
- "step": 1340
930
- },
931
- {
932
- "epoch": 5.29,
933
- "learning_rate": 4.686829268292683e-05,
934
- "loss": 0.0172,
935
- "step": 1350
936
- },
937
- {
938
- "epoch": 5.33,
939
- "learning_rate": 4.6478048780487805e-05,
940
- "loss": 0.0259,
941
- "step": 1360
942
- },
943
- {
944
- "epoch": 5.37,
945
- "learning_rate": 4.608780487804878e-05,
946
- "loss": 0.0176,
947
- "step": 1370
948
- },
949
- {
950
- "epoch": 5.41,
951
- "learning_rate": 4.5697560975609755e-05,
952
- "loss": 0.0233,
953
- "step": 1380
954
- },
955
- {
956
- "epoch": 5.45,
957
- "learning_rate": 4.530731707317073e-05,
958
- "loss": 0.0393,
959
- "step": 1390
960
- },
961
- {
962
- "epoch": 5.49,
963
- "learning_rate": 4.491707317073172e-05,
964
- "loss": 0.0386,
965
- "step": 1400
966
- },
967
- {
968
- "epoch": 5.49,
969
- "eval_accuracy": 0.8095,
970
- "eval_loss": 0.782217264175415,
971
- "eval_runtime": 40.3652,
972
- "eval_samples_per_second": 49.548,
973
- "eval_steps_per_second": 1.561,
974
- "step": 1400
975
- },
976
- {
977
- "epoch": 5.53,
978
- "learning_rate": 4.452682926829269e-05,
979
- "loss": 0.0222,
980
- "step": 1410
981
- },
982
- {
983
- "epoch": 5.57,
984
- "learning_rate": 4.413658536585367e-05,
985
- "loss": 0.041,
986
- "step": 1420
987
- },
988
- {
989
- "epoch": 5.61,
990
- "learning_rate": 4.374634146341464e-05,
991
- "loss": 0.0188,
992
- "step": 1430
993
- },
994
- {
995
- "epoch": 5.65,
996
- "learning_rate": 4.335609756097562e-05,
997
- "loss": 0.032,
998
- "step": 1440
999
- },
1000
- {
1001
- "epoch": 5.69,
1002
- "learning_rate": 4.296585365853659e-05,
1003
- "loss": 0.0159,
1004
- "step": 1450
1005
- },
1006
- {
1007
- "epoch": 5.73,
1008
- "learning_rate": 4.257560975609757e-05,
1009
- "loss": 0.0433,
1010
- "step": 1460
1011
- },
1012
- {
1013
- "epoch": 5.76,
1014
- "learning_rate": 4.218536585365854e-05,
1015
- "loss": 0.0174,
1016
- "step": 1470
1017
- },
1018
- {
1019
- "epoch": 5.8,
1020
- "learning_rate": 4.179512195121952e-05,
1021
- "loss": 0.0307,
1022
- "step": 1480
1023
- },
1024
- {
1025
- "epoch": 5.84,
1026
- "learning_rate": 4.140487804878049e-05,
1027
- "loss": 0.0221,
1028
- "step": 1490
1029
- },
1030
- {
1031
- "epoch": 5.88,
1032
- "learning_rate": 4.101463414634147e-05,
1033
- "loss": 0.0304,
1034
- "step": 1500
1035
- },
1036
- {
1037
- "epoch": 5.88,
1038
- "eval_accuracy": 0.8025,
1039
- "eval_loss": 0.7874125242233276,
1040
- "eval_runtime": 40.5243,
1041
- "eval_samples_per_second": 49.353,
1042
- "eval_steps_per_second": 1.555,
1043
- "step": 1500
1044
- },
1045
- {
1046
- "epoch": 5.92,
1047
- "learning_rate": 4.062439024390244e-05,
1048
- "loss": 0.0429,
1049
- "step": 1510
1050
- },
1051
- {
1052
- "epoch": 5.96,
1053
- "learning_rate": 4.023414634146342e-05,
1054
- "loss": 0.0244,
1055
- "step": 1520
1056
- },
1057
- {
1058
- "epoch": 6.0,
1059
- "learning_rate": 3.984390243902439e-05,
1060
- "loss": 0.0368,
1061
- "step": 1530
1062
- },
1063
- {
1064
- "epoch": 6.04,
1065
- "learning_rate": 3.945365853658537e-05,
1066
- "loss": 0.0104,
1067
- "step": 1540
1068
- },
1069
- {
1070
- "epoch": 6.08,
1071
- "learning_rate": 3.906341463414634e-05,
1072
- "loss": 0.0105,
1073
- "step": 1550
1074
- },
1075
- {
1076
- "epoch": 6.12,
1077
- "learning_rate": 3.8673170731707317e-05,
1078
- "loss": 0.0164,
1079
- "step": 1560
1080
- },
1081
- {
1082
- "epoch": 6.16,
1083
- "learning_rate": 3.828292682926829e-05,
1084
- "loss": 0.009,
1085
- "step": 1570
1086
- },
1087
- {
1088
- "epoch": 6.2,
1089
- "learning_rate": 3.7892682926829266e-05,
1090
- "loss": 0.0101,
1091
- "step": 1580
1092
- },
1093
- {
1094
- "epoch": 6.24,
1095
- "learning_rate": 3.750243902439025e-05,
1096
- "loss": 0.0184,
1097
- "step": 1590
1098
- },
1099
- {
1100
- "epoch": 6.27,
1101
- "learning_rate": 3.711219512195122e-05,
1102
- "loss": 0.0163,
1103
- "step": 1600
1104
- },
1105
- {
1106
- "epoch": 6.27,
1107
- "eval_accuracy": 0.816,
1108
- "eval_loss": 0.7488501071929932,
1109
- "eval_runtime": 40.3879,
1110
- "eval_samples_per_second": 49.52,
1111
- "eval_steps_per_second": 1.56,
1112
- "step": 1600
1113
- },
1114
- {
1115
- "epoch": 6.31,
1116
- "learning_rate": 3.67219512195122e-05,
1117
- "loss": 0.0118,
1118
- "step": 1610
1119
- },
1120
- {
1121
- "epoch": 6.35,
1122
- "learning_rate": 3.633170731707317e-05,
1123
- "loss": 0.0082,
1124
- "step": 1620
1125
- },
1126
- {
1127
- "epoch": 6.39,
1128
- "learning_rate": 3.594146341463415e-05,
1129
- "loss": 0.011,
1130
- "step": 1630
1131
- },
1132
- {
1133
- "epoch": 6.43,
1134
- "learning_rate": 3.555121951219512e-05,
1135
- "loss": 0.0088,
1136
- "step": 1640
1137
- },
1138
- {
1139
- "epoch": 6.47,
1140
- "learning_rate": 3.51609756097561e-05,
1141
- "loss": 0.0403,
1142
- "step": 1650
1143
- },
1144
- {
1145
- "epoch": 6.51,
1146
- "learning_rate": 3.477073170731708e-05,
1147
- "loss": 0.0089,
1148
- "step": 1660
1149
- },
1150
- {
1151
- "epoch": 6.55,
1152
- "learning_rate": 3.4380487804878054e-05,
1153
- "loss": 0.0203,
1154
- "step": 1670
1155
- },
1156
- {
1157
- "epoch": 6.59,
1158
- "learning_rate": 3.399024390243903e-05,
1159
- "loss": 0.0157,
1160
- "step": 1680
1161
- },
1162
- {
1163
- "epoch": 6.63,
1164
- "learning_rate": 3.3600000000000004e-05,
1165
- "loss": 0.0145,
1166
- "step": 1690
1167
- },
1168
- {
1169
- "epoch": 6.67,
1170
- "learning_rate": 3.320975609756098e-05,
1171
- "loss": 0.0171,
1172
- "step": 1700
1173
- },
1174
- {
1175
- "epoch": 6.67,
1176
- "eval_accuracy": 0.816,
1177
- "eval_loss": 0.7466872930526733,
1178
- "eval_runtime": 40.0569,
1179
- "eval_samples_per_second": 49.929,
1180
- "eval_steps_per_second": 1.573,
1181
- "step": 1700
1182
- },
1183
- {
1184
- "epoch": 6.71,
1185
- "learning_rate": 3.281951219512195e-05,
1186
- "loss": 0.0115,
1187
- "step": 1710
1188
- },
1189
- {
1190
- "epoch": 6.75,
1191
- "learning_rate": 3.2429268292682935e-05,
1192
- "loss": 0.0207,
1193
- "step": 1720
1194
- },
1195
- {
1196
- "epoch": 6.78,
1197
- "learning_rate": 3.203902439024391e-05,
1198
- "loss": 0.038,
1199
- "step": 1730
1200
- },
1201
- {
1202
- "epoch": 6.82,
1203
- "learning_rate": 3.1648780487804885e-05,
1204
- "loss": 0.0289,
1205
- "step": 1740
1206
- },
1207
- {
1208
- "epoch": 6.86,
1209
- "learning_rate": 3.125853658536586e-05,
1210
- "loss": 0.0261,
1211
- "step": 1750
1212
- },
1213
- {
1214
- "epoch": 6.9,
1215
- "learning_rate": 3.0868292682926835e-05,
1216
- "loss": 0.0191,
1217
- "step": 1760
1218
- },
1219
- {
1220
- "epoch": 6.94,
1221
- "learning_rate": 3.047804878048781e-05,
1222
- "loss": 0.008,
1223
- "step": 1770
1224
- },
1225
- {
1226
- "epoch": 6.98,
1227
- "learning_rate": 3.0087804878048784e-05,
1228
- "loss": 0.0076,
1229
- "step": 1780
1230
- },
1231
- {
1232
- "epoch": 7.02,
1233
- "learning_rate": 2.969756097560976e-05,
1234
- "loss": 0.0085,
1235
- "step": 1790
1236
- },
1237
- {
1238
- "epoch": 7.06,
1239
- "learning_rate": 2.9307317073170734e-05,
1240
- "loss": 0.0068,
1241
- "step": 1800
1242
- },
1243
- {
1244
- "epoch": 7.06,
1245
- "eval_accuracy": 0.8155,
1246
- "eval_loss": 0.7402506470680237,
1247
- "eval_runtime": 40.0633,
1248
- "eval_samples_per_second": 49.921,
1249
- "eval_steps_per_second": 1.573,
1250
- "step": 1800
1251
- },
1252
- {
1253
- "epoch": 7.1,
1254
- "learning_rate": 2.891707317073171e-05,
1255
- "loss": 0.0132,
1256
- "step": 1810
1257
- },
1258
- {
1259
- "epoch": 7.14,
1260
- "learning_rate": 2.8526829268292684e-05,
1261
- "loss": 0.0136,
1262
- "step": 1820
1263
- },
1264
- {
1265
- "epoch": 7.18,
1266
- "learning_rate": 2.813658536585366e-05,
1267
- "loss": 0.0107,
1268
- "step": 1830
1269
- },
1270
- {
1271
- "epoch": 7.22,
1272
- "learning_rate": 2.7746341463414634e-05,
1273
- "loss": 0.0069,
1274
- "step": 1840
1275
- },
1276
- {
1277
- "epoch": 7.25,
1278
- "learning_rate": 2.7356097560975615e-05,
1279
- "loss": 0.0152,
1280
- "step": 1850
1281
- },
1282
- {
1283
- "epoch": 7.29,
1284
- "learning_rate": 2.696585365853659e-05,
1285
- "loss": 0.0135,
1286
- "step": 1860
1287
- },
1288
- {
1289
- "epoch": 7.33,
1290
- "learning_rate": 2.6575609756097565e-05,
1291
- "loss": 0.0134,
1292
- "step": 1870
1293
- },
1294
- {
1295
- "epoch": 7.37,
1296
- "learning_rate": 2.618536585365854e-05,
1297
- "loss": 0.0174,
1298
- "step": 1880
1299
- },
1300
- {
1301
- "epoch": 7.41,
1302
- "learning_rate": 2.5795121951219515e-05,
1303
- "loss": 0.0062,
1304
- "step": 1890
1305
- },
1306
- {
1307
- "epoch": 7.45,
1308
- "learning_rate": 2.540487804878049e-05,
1309
- "loss": 0.0064,
1310
- "step": 1900
1311
- },
1312
- {
1313
- "epoch": 7.45,
1314
- "eval_accuracy": 0.8225,
1315
- "eval_loss": 0.7162501215934753,
1316
- "eval_runtime": 40.5713,
1317
- "eval_samples_per_second": 49.296,
1318
- "eval_steps_per_second": 1.553,
1319
- "step": 1900
1320
- },
1321
- {
1322
- "epoch": 7.49,
1323
- "learning_rate": 2.5014634146341465e-05,
1324
- "loss": 0.0058,
1325
- "step": 1910
1326
- },
1327
- {
1328
- "epoch": 7.53,
1329
- "learning_rate": 2.462439024390244e-05,
1330
- "loss": 0.0056,
1331
- "step": 1920
1332
- },
1333
- {
1334
- "epoch": 7.57,
1335
- "learning_rate": 2.4234146341463414e-05,
1336
- "loss": 0.006,
1337
- "step": 1930
1338
- },
1339
- {
1340
- "epoch": 7.61,
1341
- "learning_rate": 2.3843902439024393e-05,
1342
- "loss": 0.0175,
1343
- "step": 1940
1344
- },
1345
- {
1346
- "epoch": 7.65,
1347
- "learning_rate": 2.3453658536585367e-05,
1348
- "loss": 0.0062,
1349
- "step": 1950
1350
- },
1351
- {
1352
- "epoch": 7.69,
1353
- "learning_rate": 2.3063414634146342e-05,
1354
- "loss": 0.0062,
1355
- "step": 1960
1356
- },
1357
- {
1358
- "epoch": 7.73,
1359
- "learning_rate": 2.2673170731707317e-05,
1360
- "loss": 0.0166,
1361
- "step": 1970
1362
- },
1363
- {
1364
- "epoch": 7.76,
1365
- "learning_rate": 2.2282926829268296e-05,
1366
- "loss": 0.0121,
1367
- "step": 1980
1368
- },
1369
- {
1370
- "epoch": 7.8,
1371
- "learning_rate": 2.189268292682927e-05,
1372
- "loss": 0.0119,
1373
- "step": 1990
1374
- },
1375
- {
1376
- "epoch": 7.84,
1377
- "learning_rate": 2.150243902439025e-05,
1378
- "loss": 0.0236,
1379
- "step": 2000
1380
- },
1381
- {
1382
- "epoch": 7.84,
1383
- "eval_accuracy": 0.824,
1384
- "eval_loss": 0.7153184413909912,
1385
- "eval_runtime": 40.1832,
1386
- "eval_samples_per_second": 49.772,
1387
- "eval_steps_per_second": 1.568,
1388
- "step": 2000
1389
- },
1390
- {
1391
- "epoch": 7.88,
1392
- "learning_rate": 2.1112195121951224e-05,
1393
- "loss": 0.0141,
1394
- "step": 2010
1395
- },
1396
- {
1397
- "epoch": 7.92,
1398
- "learning_rate": 2.07219512195122e-05,
1399
- "loss": 0.016,
1400
- "step": 2020
1401
- },
1402
- {
1403
- "epoch": 7.96,
1404
- "learning_rate": 2.0331707317073173e-05,
1405
- "loss": 0.0173,
1406
- "step": 2030
1407
- },
1408
- {
1409
- "epoch": 8.0,
1410
- "learning_rate": 1.9941463414634148e-05,
1411
- "loss": 0.0058,
1412
- "step": 2040
1413
- },
1414
- {
1415
- "epoch": 8.04,
1416
- "learning_rate": 1.9551219512195123e-05,
1417
- "loss": 0.007,
1418
- "step": 2050
1419
- },
1420
- {
1421
- "epoch": 8.08,
1422
- "learning_rate": 1.9160975609756098e-05,
1423
- "loss": 0.007,
1424
- "step": 2060
1425
- },
1426
- {
1427
- "epoch": 8.12,
1428
- "learning_rate": 1.8770731707317073e-05,
1429
- "loss": 0.0052,
1430
- "step": 2070
1431
- },
1432
- {
1433
- "epoch": 8.16,
1434
- "learning_rate": 1.838048780487805e-05,
1435
- "loss": 0.0059,
1436
- "step": 2080
1437
- },
1438
- {
1439
- "epoch": 8.2,
1440
- "learning_rate": 1.7990243902439026e-05,
1441
- "loss": 0.0051,
1442
- "step": 2090
1443
- },
1444
- {
1445
- "epoch": 8.24,
1446
- "learning_rate": 1.76e-05,
1447
- "loss": 0.0075,
1448
- "step": 2100
1449
- },
1450
- {
1451
- "epoch": 8.24,
1452
- "eval_accuracy": 0.8255,
1453
- "eval_loss": 0.7127741575241089,
1454
- "eval_runtime": 40.1888,
1455
- "eval_samples_per_second": 49.765,
1456
- "eval_steps_per_second": 1.568,
1457
- "step": 2100
1458
- },
1459
- {
1460
- "epoch": 8.27,
1461
- "learning_rate": 1.7209756097560976e-05,
1462
- "loss": 0.0049,
1463
- "step": 2110
1464
- },
1465
- {
1466
- "epoch": 8.31,
1467
- "learning_rate": 1.681951219512195e-05,
1468
- "loss": 0.0087,
1469
- "step": 2120
1470
- },
1471
- {
1472
- "epoch": 8.35,
1473
- "learning_rate": 1.6429268292682926e-05,
1474
- "loss": 0.0065,
1475
- "step": 2130
1476
- },
1477
- {
1478
- "epoch": 8.39,
1479
- "learning_rate": 1.6039024390243904e-05,
1480
- "loss": 0.0053,
1481
- "step": 2140
1482
- },
1483
- {
1484
- "epoch": 8.43,
1485
- "learning_rate": 1.564878048780488e-05,
1486
- "loss": 0.0133,
1487
- "step": 2150
1488
- },
1489
- {
1490
- "epoch": 8.47,
1491
- "learning_rate": 1.5258536585365855e-05,
1492
- "loss": 0.0186,
1493
- "step": 2160
1494
- },
1495
- {
1496
- "epoch": 8.51,
1497
- "learning_rate": 1.486829268292683e-05,
1498
- "loss": 0.0178,
1499
- "step": 2170
1500
- },
1501
- {
1502
- "epoch": 8.55,
1503
- "learning_rate": 1.4478048780487805e-05,
1504
- "loss": 0.0051,
1505
- "step": 2180
1506
- },
1507
- {
1508
- "epoch": 8.59,
1509
- "learning_rate": 1.4087804878048782e-05,
1510
- "loss": 0.0051,
1511
- "step": 2190
1512
- },
1513
- {
1514
- "epoch": 8.63,
1515
- "learning_rate": 1.3697560975609758e-05,
1516
- "loss": 0.0154,
1517
- "step": 2200
1518
- },
1519
- {
1520
- "epoch": 8.63,
1521
- "eval_accuracy": 0.8235,
1522
- "eval_loss": 0.7104279398918152,
1523
- "eval_runtime": 39.6742,
1524
- "eval_samples_per_second": 50.411,
1525
- "eval_steps_per_second": 1.588,
1526
- "step": 2200
1527
- },
1528
- {
1529
- "epoch": 8.67,
1530
- "learning_rate": 1.3307317073170733e-05,
1531
- "loss": 0.0051,
1532
- "step": 2210
1533
- },
1534
- {
1535
- "epoch": 8.71,
1536
- "learning_rate": 1.2917073170731708e-05,
1537
- "loss": 0.0097,
1538
- "step": 2220
1539
- },
1540
- {
1541
- "epoch": 8.75,
1542
- "learning_rate": 1.2526829268292685e-05,
1543
- "loss": 0.0213,
1544
- "step": 2230
1545
- },
1546
- {
1547
- "epoch": 8.78,
1548
- "learning_rate": 1.213658536585366e-05,
1549
- "loss": 0.0091,
1550
- "step": 2240
1551
- },
1552
- {
1553
- "epoch": 8.82,
1554
- "learning_rate": 1.1746341463414634e-05,
1555
- "loss": 0.0087,
1556
- "step": 2250
1557
- },
1558
- {
1559
- "epoch": 8.86,
1560
- "learning_rate": 1.135609756097561e-05,
1561
- "loss": 0.005,
1562
- "step": 2260
1563
- },
1564
- {
1565
- "epoch": 8.9,
1566
- "learning_rate": 1.0965853658536587e-05,
1567
- "loss": 0.0119,
1568
- "step": 2270
1569
- },
1570
- {
1571
- "epoch": 8.94,
1572
- "learning_rate": 1.0575609756097562e-05,
1573
- "loss": 0.0049,
1574
- "step": 2280
1575
- },
1576
- {
1577
- "epoch": 8.98,
1578
- "learning_rate": 1.0185365853658537e-05,
1579
- "loss": 0.0152,
1580
- "step": 2290
1581
- },
1582
- {
1583
- "epoch": 9.02,
1584
- "learning_rate": 9.795121951219512e-06,
1585
- "loss": 0.0082,
1586
- "step": 2300
1587
- },
1588
- {
1589
- "epoch": 9.02,
1590
- "eval_accuracy": 0.824,
1591
- "eval_loss": 0.7048487067222595,
1592
- "eval_runtime": 39.8204,
1593
- "eval_samples_per_second": 50.226,
1594
- "eval_steps_per_second": 1.582,
1595
- "step": 2300
1596
- },
1597
- {
1598
- "epoch": 9.06,
1599
- "learning_rate": 9.404878048780489e-06,
1600
- "loss": 0.0046,
1601
- "step": 2310
1602
- },
1603
- {
1604
- "epoch": 9.1,
1605
- "learning_rate": 9.014634146341464e-06,
1606
- "loss": 0.0076,
1607
- "step": 2320
1608
- },
1609
- {
1610
- "epoch": 9.14,
1611
- "learning_rate": 8.62439024390244e-06,
1612
- "loss": 0.0107,
1613
- "step": 2330
1614
- },
1615
- {
1616
- "epoch": 9.18,
1617
- "learning_rate": 8.234146341463415e-06,
1618
- "loss": 0.0182,
1619
- "step": 2340
1620
- },
1621
- {
1622
- "epoch": 9.22,
1623
- "learning_rate": 7.843902439024392e-06,
1624
- "loss": 0.0045,
1625
- "step": 2350
1626
- },
1627
- {
1628
- "epoch": 9.25,
1629
- "learning_rate": 7.4536585365853665e-06,
1630
- "loss": 0.0047,
1631
- "step": 2360
1632
- },
1633
- {
1634
- "epoch": 9.29,
1635
- "learning_rate": 7.063414634146341e-06,
1636
- "loss": 0.0047,
1637
- "step": 2370
1638
- },
1639
- {
1640
- "epoch": 9.33,
1641
- "learning_rate": 6.673170731707318e-06,
1642
- "loss": 0.0045,
1643
- "step": 2380
1644
- },
1645
- {
1646
- "epoch": 9.37,
1647
- "learning_rate": 6.282926829268293e-06,
1648
- "loss": 0.0162,
1649
- "step": 2390
1650
- },
1651
- {
1652
- "epoch": 9.41,
1653
- "learning_rate": 5.892682926829269e-06,
1654
- "loss": 0.007,
1655
- "step": 2400
1656
- },
1657
- {
1658
- "epoch": 9.41,
1659
- "eval_accuracy": 0.823,
1660
- "eval_loss": 0.7026473879814148,
1661
- "eval_runtime": 40.761,
1662
- "eval_samples_per_second": 49.067,
1663
- "eval_steps_per_second": 1.546,
1664
- "step": 2400
1665
- },
1666
- {
1667
- "epoch": 9.45,
1668
- "learning_rate": 5.502439024390245e-06,
1669
- "loss": 0.0074,
1670
- "step": 2410
1671
- },
1672
- {
1673
- "epoch": 9.49,
1674
- "learning_rate": 5.11219512195122e-06,
1675
- "loss": 0.0045,
1676
- "step": 2420
1677
- },
1678
- {
1679
- "epoch": 9.53,
1680
- "learning_rate": 4.721951219512195e-06,
1681
- "loss": 0.0044,
1682
- "step": 2430
1683
- },
1684
- {
1685
- "epoch": 9.57,
1686
- "learning_rate": 4.331707317073171e-06,
1687
- "loss": 0.0046,
1688
- "step": 2440
1689
- },
1690
- {
1691
- "epoch": 9.61,
1692
- "learning_rate": 3.941463414634147e-06,
1693
- "loss": 0.0066,
1694
- "step": 2450
1695
- },
1696
- {
1697
- "epoch": 9.65,
1698
- "learning_rate": 3.551219512195122e-06,
1699
- "loss": 0.009,
1700
- "step": 2460
1701
- },
1702
- {
1703
- "epoch": 9.69,
1704
- "learning_rate": 3.160975609756098e-06,
1705
- "loss": 0.0045,
1706
- "step": 2470
1707
- },
1708
- {
1709
- "epoch": 9.73,
1710
- "learning_rate": 2.7707317073170736e-06,
1711
- "loss": 0.0046,
1712
- "step": 2480
1713
- },
1714
- {
1715
- "epoch": 9.76,
1716
- "learning_rate": 2.380487804878049e-06,
1717
- "loss": 0.0147,
1718
- "step": 2490
1719
- },
1720
- {
1721
- "epoch": 9.8,
1722
- "learning_rate": 1.9902439024390246e-06,
1723
- "loss": 0.0045,
1724
- "step": 2500
1725
- },
1726
- {
1727
- "epoch": 9.8,
1728
- "eval_accuracy": 0.8235,
1729
- "eval_loss": 0.7031954526901245,
1730
- "eval_runtime": 39.643,
1731
- "eval_samples_per_second": 50.45,
1732
- "eval_steps_per_second": 1.589,
1733
- "step": 2500
1734
- }
1735
- ],
1736
- "max_steps": 2550,
1737
- "num_train_epochs": 10,
1738
- "total_flos": 6.198960121799639e+18,
1739
- "trial_name": null,
1740
- "trial_params": null
1741
- }