Rookiezz commited on
Commit
e59e56b
·
verified ·
1 Parent(s): 8c39798

Upload 4 files

Browse files
Files changed (3) hide show
  1. rng_state.pth +2 -2
  2. scheduler.pt +1 -1
  3. trainer_state.json +2062 -391
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4292d1113b4a8d3acc9212b18e9fa8b8bd873c8a52928d290e0302ec7df29163
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8389f21e71ce92f43a547972b89c1c26b0f480b5298694cb63fb2754cff0298a
3
+ size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb75cd671247b41bbd34848e4df7927814e754c9cb28ec0aa9edae59ac06cf69
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:267eadca69671bc0460ea479ba8c13a4276356a16351a73e44520e14439a1b48
3
  size 627
trainer_state.json CHANGED
@@ -1,763 +1,2434 @@
1
  {
2
- "best_metric": 349.6192030463756,
3
- "best_model_checkpoint": "./whisper-small-th/checkpoint-1000",
4
- "epoch": 2.2440917272493515,
5
- "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.02,
12
- "learning_rate": 5.000000000000001e-07,
13
- "loss": 1.4088,
14
  "step": 25
15
  },
16
  {
17
- "epoch": 0.04,
18
- "learning_rate": 1.0000000000000002e-06,
19
- "loss": 1.1763,
20
  "step": 50
21
  },
22
  {
23
- "epoch": 0.06,
24
- "learning_rate": 1.5e-06,
25
- "loss": 0.7903,
26
  "step": 75
27
  },
28
  {
29
- "epoch": 0.07,
30
- "learning_rate": 2.0000000000000003e-06,
31
- "loss": 0.5779,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 0.09,
36
- "learning_rate": 2.5e-06,
37
- "loss": 0.5041,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 0.11,
42
- "learning_rate": 3e-06,
43
- "loss": 0.4802,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 0.13,
48
- "learning_rate": 3.48e-06,
49
- "loss": 0.4499,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 0.15,
54
- "learning_rate": 3.980000000000001e-06,
55
- "loss": 0.447,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 0.17,
60
- "learning_rate": 4.48e-06,
61
- "loss": 0.4707,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 0.19,
66
- "learning_rate": 4.980000000000001e-06,
67
- "loss": 0.4222,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 0.21,
72
- "learning_rate": 5.480000000000001e-06,
73
- "loss": 0.4262,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 0.22,
78
- "learning_rate": 5.98e-06,
79
- "loss": 0.3833,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 0.24,
84
- "learning_rate": 6.480000000000001e-06,
85
- "loss": 0.3818,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 0.26,
90
- "learning_rate": 6.98e-06,
91
- "loss": 0.3956,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 0.28,
96
- "learning_rate": 7.48e-06,
97
- "loss": 0.4027,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 0.3,
102
- "learning_rate": 7.980000000000002e-06,
103
- "loss": 0.3553,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 0.32,
108
- "learning_rate": 8.48e-06,
109
- "loss": 0.36,
110
  "step": 425
111
  },
112
  {
113
- "epoch": 0.34,
114
- "learning_rate": 8.98e-06,
115
- "loss": 0.346,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 0.36,
120
- "learning_rate": 9.48e-06,
121
- "loss": 0.3413,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 0.37,
126
- "learning_rate": 9.980000000000001e-06,
127
- "loss": 0.3354,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 0.39,
132
- "learning_rate": 9.946666666666667e-06,
133
- "loss": 0.3542,
134
  "step": 525
135
  },
136
  {
137
- "epoch": 0.41,
138
- "learning_rate": 9.891111111111113e-06,
139
- "loss": 0.3199,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 0.43,
144
- "learning_rate": 9.835555555555556e-06,
145
- "loss": 0.3121,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 0.45,
150
- "learning_rate": 9.780000000000001e-06,
151
- "loss": 0.3131,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 0.47,
156
- "learning_rate": 9.724444444444445e-06,
157
- "loss": 0.3073,
158
  "step": 625
159
  },
160
  {
161
- "epoch": 0.49,
162
- "learning_rate": 9.66888888888889e-06,
163
- "loss": 0.312,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 0.5,
168
- "learning_rate": 9.613333333333335e-06,
169
- "loss": 0.3004,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 0.52,
174
- "learning_rate": 9.557777777777777e-06,
175
- "loss": 0.2856,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 0.54,
180
- "learning_rate": 9.502222222222223e-06,
181
- "loss": 0.2822,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 0.56,
186
- "learning_rate": 9.446666666666667e-06,
187
- "loss": 0.2953,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.58,
192
- "learning_rate": 9.391111111111111e-06,
193
- "loss": 0.2755,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 0.6,
198
- "learning_rate": 9.335555555555557e-06,
199
- "loss": 0.2797,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 0.62,
204
- "learning_rate": 9.280000000000001e-06,
205
- "loss": 0.2648,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 0.64,
210
- "learning_rate": 9.224444444444445e-06,
211
- "loss": 0.2957,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 0.65,
216
- "learning_rate": 9.168888888888889e-06,
217
- "loss": 0.2802,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 0.67,
222
- "learning_rate": 9.113333333333335e-06,
223
- "loss": 0.2613,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 0.69,
228
- "learning_rate": 9.057777777777779e-06,
229
- "loss": 0.2729,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 0.71,
234
- "learning_rate": 9.002222222222223e-06,
235
- "loss": 0.2506,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 0.73,
240
- "learning_rate": 8.946666666666669e-06,
241
- "loss": 0.2542,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 0.75,
246
- "learning_rate": 8.891111111111111e-06,
247
- "loss": 0.261,
248
- "step": 1000
249
- },
250
- {
251
- "epoch": 0.75,
252
- "eval_loss": 0.22164225578308105,
253
- "eval_runtime": 6851.3771,
254
- "eval_samples_per_second": 1.595,
255
- "eval_steps_per_second": 1.595,
256
- "eval_wer": 349.6192030463756,
257
  "step": 1000
258
  },
259
  {
260
- "epoch": 0.77,
261
- "learning_rate": 8.835555555555557e-06,
262
- "loss": 0.262,
263
  "step": 1025
264
  },
265
  {
266
- "epoch": 0.79,
267
- "learning_rate": 8.78e-06,
268
- "loss": 0.2451,
269
  "step": 1050
270
  },
271
  {
272
- "epoch": 0.8,
273
- "learning_rate": 8.724444444444445e-06,
274
- "loss": 0.2607,
275
  "step": 1075
276
  },
277
  {
278
- "epoch": 0.82,
279
- "learning_rate": 8.66888888888889e-06,
280
- "loss": 0.2613,
281
  "step": 1100
282
  },
283
  {
284
- "epoch": 0.84,
285
- "learning_rate": 8.613333333333333e-06,
286
- "loss": 0.2346,
287
  "step": 1125
288
  },
289
  {
290
- "epoch": 0.86,
291
- "learning_rate": 8.557777777777778e-06,
292
- "loss": 0.2353,
293
  "step": 1150
294
  },
295
  {
296
- "epoch": 0.88,
297
- "learning_rate": 8.502222222222223e-06,
298
- "loss": 0.2456,
299
  "step": 1175
300
  },
301
  {
302
- "epoch": 0.9,
303
- "learning_rate": 8.446666666666668e-06,
304
- "loss": 0.2266,
305
  "step": 1200
306
  },
307
  {
308
- "epoch": 0.92,
309
- "learning_rate": 8.391111111111112e-06,
310
- "loss": 0.2259,
311
  "step": 1225
312
  },
313
  {
314
- "epoch": 0.94,
315
- "learning_rate": 8.335555555555556e-06,
316
- "loss": 0.2314,
317
  "step": 1250
318
  },
319
  {
320
- "epoch": 0.95,
321
- "learning_rate": 8.28e-06,
322
- "loss": 0.2279,
323
  "step": 1275
324
  },
325
  {
326
- "epoch": 0.97,
327
- "learning_rate": 8.224444444444444e-06,
328
- "loss": 0.2484,
329
  "step": 1300
330
  },
331
  {
332
- "epoch": 0.99,
333
- "learning_rate": 8.16888888888889e-06,
334
- "loss": 0.2277,
335
  "step": 1325
336
  },
337
  {
338
- "epoch": 1.01,
339
- "learning_rate": 8.113333333333334e-06,
340
- "loss": 0.2045,
341
  "step": 1350
342
  },
343
  {
344
- "epoch": 1.03,
345
- "learning_rate": 8.057777777777778e-06,
346
- "loss": 0.1655,
347
  "step": 1375
348
  },
349
  {
350
- "epoch": 1.05,
351
- "learning_rate": 8.002222222222222e-06,
352
- "loss": 0.1579,
353
  "step": 1400
354
  },
355
  {
356
- "epoch": 1.07,
357
- "learning_rate": 7.946666666666666e-06,
358
- "loss": 0.1664,
359
  "step": 1425
360
  },
361
  {
362
- "epoch": 1.08,
363
- "learning_rate": 7.891111111111112e-06,
364
- "loss": 0.1616,
365
  "step": 1450
366
  },
367
  {
368
- "epoch": 1.1,
369
- "learning_rate": 7.835555555555556e-06,
370
- "loss": 0.1649,
371
  "step": 1475
372
  },
373
  {
374
- "epoch": 1.12,
375
- "learning_rate": 7.78e-06,
376
- "loss": 0.1629,
377
  "step": 1500
378
  },
379
  {
380
- "epoch": 1.14,
381
- "learning_rate": 7.724444444444446e-06,
382
- "loss": 0.1567,
383
  "step": 1525
384
  },
385
  {
386
- "epoch": 1.16,
387
- "learning_rate": 7.66888888888889e-06,
388
- "loss": 0.1532,
389
  "step": 1550
390
  },
391
  {
392
- "epoch": 1.18,
393
- "learning_rate": 7.613333333333334e-06,
394
- "loss": 0.1574,
395
  "step": 1575
396
  },
397
  {
398
- "epoch": 1.2,
399
- "learning_rate": 7.557777777777779e-06,
400
- "loss": 0.1597,
401
  "step": 1600
402
  },
403
  {
404
- "epoch": 1.22,
405
- "learning_rate": 7.502222222222223e-06,
406
- "loss": 0.1824,
407
  "step": 1625
408
  },
409
  {
410
- "epoch": 1.23,
411
- "learning_rate": 7.446666666666668e-06,
412
- "loss": 0.157,
413
  "step": 1650
414
  },
415
  {
416
- "epoch": 1.25,
417
- "learning_rate": 7.3911111111111125e-06,
418
- "loss": 0.1496,
419
  "step": 1675
420
  },
421
  {
422
- "epoch": 1.27,
423
- "learning_rate": 7.335555555555556e-06,
424
- "loss": 0.1515,
425
  "step": 1700
426
  },
427
  {
428
- "epoch": 1.29,
429
- "learning_rate": 7.280000000000001e-06,
430
- "loss": 0.1485,
431
  "step": 1725
432
  },
433
  {
434
- "epoch": 1.31,
435
- "learning_rate": 7.224444444444445e-06,
436
- "loss": 0.1443,
437
  "step": 1750
438
  },
439
  {
440
- "epoch": 1.33,
441
- "learning_rate": 7.1688888888888895e-06,
442
- "loss": 0.1466,
443
  "step": 1775
444
  },
445
  {
446
- "epoch": 1.35,
447
- "learning_rate": 7.113333333333334e-06,
448
- "loss": 0.1611,
449
  "step": 1800
450
  },
451
  {
452
- "epoch": 1.37,
453
- "learning_rate": 7.057777777777778e-06,
454
- "loss": 0.1537,
455
  "step": 1825
456
  },
457
  {
458
- "epoch": 1.38,
459
- "learning_rate": 7.0022222222222225e-06,
460
- "loss": 0.1515,
461
  "step": 1850
462
  },
463
  {
464
- "epoch": 1.4,
465
- "learning_rate": 6.946666666666667e-06,
466
- "loss": 0.1612,
467
  "step": 1875
468
  },
469
  {
470
- "epoch": 1.42,
471
- "learning_rate": 6.891111111111111e-06,
472
- "loss": 0.1484,
473
  "step": 1900
474
  },
475
  {
476
- "epoch": 1.44,
477
- "learning_rate": 6.835555555555556e-06,
478
- "loss": 0.151,
479
  "step": 1925
480
  },
481
  {
482
- "epoch": 1.46,
483
- "learning_rate": 6.780000000000001e-06,
484
- "loss": 0.1476,
485
  "step": 1950
486
  },
487
  {
488
- "epoch": 1.48,
489
- "learning_rate": 6.724444444444444e-06,
490
- "loss": 0.151,
491
  "step": 1975
492
  },
493
  {
494
- "epoch": 1.5,
495
- "learning_rate": 6.668888888888889e-06,
496
- "loss": 0.1471,
497
- "step": 2000
498
- },
499
- {
500
- "epoch": 1.5,
501
- "eval_loss": 0.18018271028995514,
502
- "eval_runtime": 6949.9088,
503
- "eval_samples_per_second": 1.573,
504
- "eval_steps_per_second": 1.573,
505
- "eval_wer": 364.84428124575004,
506
  "step": 2000
507
  },
508
  {
509
- "epoch": 1.51,
510
- "learning_rate": 6.613333333333334e-06,
511
- "loss": 0.1381,
512
  "step": 2025
513
  },
514
  {
515
- "epoch": 1.53,
516
- "learning_rate": 6.557777777777778e-06,
517
- "loss": 0.1562,
518
  "step": 2050
519
  },
520
  {
521
- "epoch": 1.55,
522
- "learning_rate": 6.502222222222223e-06,
523
- "loss": 0.1565,
524
  "step": 2075
525
  },
526
  {
527
- "epoch": 1.57,
528
- "learning_rate": 6.446666666666668e-06,
529
- "loss": 0.1698,
530
  "step": 2100
531
  },
532
  {
533
- "epoch": 1.59,
534
- "learning_rate": 6.391111111111111e-06,
535
- "loss": 0.1423,
536
  "step": 2125
537
  },
538
  {
539
- "epoch": 1.61,
540
- "learning_rate": 6.335555555555556e-06,
541
- "loss": 0.1409,
542
  "step": 2150
543
  },
544
  {
545
- "epoch": 1.63,
546
- "learning_rate": 6.280000000000001e-06,
547
- "loss": 0.1637,
548
  "step": 2175
549
  },
550
  {
551
- "epoch": 1.65,
552
- "learning_rate": 6.224444444444445e-06,
553
- "loss": 0.1445,
554
  "step": 2200
555
  },
556
  {
557
- "epoch": 1.66,
558
- "learning_rate": 6.16888888888889e-06,
559
- "loss": 0.1404,
560
  "step": 2225
561
  },
562
  {
563
- "epoch": 1.68,
564
- "learning_rate": 6.113333333333333e-06,
565
- "loss": 0.1558,
566
  "step": 2250
567
  },
568
  {
569
- "epoch": 1.7,
570
- "learning_rate": 6.057777777777778e-06,
571
- "loss": 0.1454,
572
  "step": 2275
573
  },
574
  {
575
- "epoch": 1.72,
576
- "learning_rate": 6.002222222222223e-06,
577
- "loss": 0.1409,
578
  "step": 2300
579
  },
580
  {
581
- "epoch": 1.74,
582
- "learning_rate": 5.946666666666668e-06,
583
- "loss": 0.143,
584
  "step": 2325
585
  },
586
  {
587
- "epoch": 1.76,
588
- "learning_rate": 5.891111111111112e-06,
589
- "loss": 0.1403,
590
  "step": 2350
591
  },
592
  {
593
- "epoch": 1.78,
594
- "learning_rate": 5.8355555555555565e-06,
595
- "loss": 0.1523,
596
  "step": 2375
597
  },
598
  {
599
- "epoch": 1.8,
600
- "learning_rate": 5.78e-06,
601
- "loss": 0.1308,
602
  "step": 2400
603
  },
604
  {
605
- "epoch": 1.81,
606
- "learning_rate": 5.724444444444445e-06,
607
- "loss": 0.1508,
608
  "step": 2425
609
  },
610
  {
611
- "epoch": 1.83,
612
- "learning_rate": 5.6688888888888895e-06,
613
- "loss": 0.1478,
614
  "step": 2450
615
  },
616
  {
617
- "epoch": 1.85,
618
- "learning_rate": 5.613333333333334e-06,
619
- "loss": 0.1517,
620
  "step": 2475
621
  },
622
  {
623
- "epoch": 1.87,
624
- "learning_rate": 5.557777777777778e-06,
625
- "loss": 0.136,
626
  "step": 2500
627
  },
628
  {
629
- "epoch": 1.89,
630
- "learning_rate": 5.5022222222222224e-06,
631
- "loss": 0.1361,
632
  "step": 2525
633
  },
634
  {
635
- "epoch": 1.91,
636
- "learning_rate": 5.4466666666666665e-06,
637
- "loss": 0.1419,
638
  "step": 2550
639
  },
640
  {
641
- "epoch": 1.93,
642
- "learning_rate": 5.391111111111111e-06,
643
- "loss": 0.146,
644
  "step": 2575
645
  },
646
  {
647
- "epoch": 1.94,
648
- "learning_rate": 5.335555555555556e-06,
649
- "loss": 0.1456,
650
  "step": 2600
651
  },
652
  {
653
- "epoch": 1.96,
654
- "learning_rate": 5.28e-06,
655
- "loss": 0.1383,
656
  "step": 2625
657
  },
658
  {
659
- "epoch": 1.98,
660
- "learning_rate": 5.224444444444445e-06,
661
- "loss": 0.142,
662
  "step": 2650
663
  },
664
  {
665
- "epoch": 2.0,
666
- "learning_rate": 5.168888888888889e-06,
667
- "loss": 0.1353,
668
  "step": 2675
669
  },
670
  {
671
- "epoch": 2.02,
672
- "learning_rate": 5.113333333333333e-06,
673
- "loss": 0.0836,
674
  "step": 2700
675
  },
676
  {
677
- "epoch": 2.04,
678
- "learning_rate": 5.057777777777778e-06,
679
- "loss": 0.081,
680
  "step": 2725
681
  },
682
  {
683
- "epoch": 2.06,
684
- "learning_rate": 5.002222222222223e-06,
685
- "loss": 0.0914,
686
  "step": 2750
687
  },
688
  {
689
- "epoch": 2.08,
690
- "learning_rate": 4.946666666666667e-06,
691
- "loss": 0.0806,
692
  "step": 2775
693
  },
694
  {
695
- "epoch": 2.09,
696
- "learning_rate": 4.891111111111111e-06,
697
- "loss": 0.083,
698
  "step": 2800
699
  },
700
  {
701
- "epoch": 2.11,
702
- "learning_rate": 4.835555555555556e-06,
703
- "loss": 0.078,
704
  "step": 2825
705
  },
706
  {
707
- "epoch": 2.13,
708
- "learning_rate": 4.78e-06,
709
- "loss": 0.0799,
710
  "step": 2850
711
  },
712
  {
713
- "epoch": 2.15,
714
- "learning_rate": 4.724444444444445e-06,
715
- "loss": 0.0864,
716
  "step": 2875
717
  },
718
  {
719
- "epoch": 2.17,
720
- "learning_rate": 4.66888888888889e-06,
721
- "loss": 0.078,
722
  "step": 2900
723
  },
724
  {
725
- "epoch": 2.19,
726
- "learning_rate": 4.613333333333334e-06,
727
- "loss": 0.0863,
728
  "step": 2925
729
  },
730
  {
731
- "epoch": 2.21,
732
- "learning_rate": 4.557777777777778e-06,
733
- "loss": 0.0794,
734
  "step": 2950
735
  },
736
  {
737
- "epoch": 2.23,
738
- "learning_rate": 4.502222222222223e-06,
739
- "loss": 0.0789,
740
  "step": 2975
741
  },
742
  {
743
- "epoch": 2.24,
744
- "learning_rate": 4.446666666666667e-06,
745
- "loss": 0.093,
746
  "step": 3000
747
  },
748
  {
749
- "epoch": 2.24,
750
- "eval_loss": 0.16714586317539215,
751
- "eval_runtime": 6790.9362,
752
- "eval_samples_per_second": 1.609,
753
- "eval_steps_per_second": 1.609,
754
- "eval_wer": 360.4719162246702,
755
- "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
756
  }
757
  ],
758
- "max_steps": 5000,
759
  "num_train_epochs": 4,
760
- "total_flos": 2.770419843072e+19,
761
  "trial_name": null,
762
  "trial_params": null
763
  }
 
1
  {
2
+ "best_metric": 455.00475996192034,
3
+ "best_model_checkpoint": "./whisper-small-th/checkpoint-10000",
4
+ "epoch": 3.7397157816005984,
5
+ "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
+ "learning_rate": 4.4e-07,
13
+ "loss": 1.2638,
14
  "step": 25
15
  },
16
  {
17
+ "epoch": 0.02,
18
+ "learning_rate": 9.400000000000001e-07,
19
+ "loss": 1.1302,
20
  "step": 50
21
  },
22
  {
23
+ "epoch": 0.03,
24
+ "learning_rate": 1.44e-06,
25
+ "loss": 0.8499,
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 0.04,
30
+ "learning_rate": 1.94e-06,
31
+ "loss": 0.5874,
32
  "step": 100
33
  },
34
  {
35
+ "epoch": 0.05,
36
+ "learning_rate": 2.4400000000000004e-06,
37
+ "loss": 0.5495,
38
  "step": 125
39
  },
40
  {
41
+ "epoch": 0.06,
42
+ "learning_rate": 2.9400000000000002e-06,
43
+ "loss": 0.5288,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 0.07,
48
+ "learning_rate": 3.44e-06,
49
+ "loss": 0.4926,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 0.07,
54
+ "learning_rate": 3.94e-06,
55
+ "loss": 0.4848,
56
  "step": 200
57
  },
58
  {
59
+ "epoch": 0.08,
60
+ "learning_rate": 4.440000000000001e-06,
61
+ "loss": 0.4442,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 0.09,
66
+ "learning_rate": 4.94e-06,
67
+ "loss": 0.447,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 0.1,
72
+ "learning_rate": 5.4400000000000004e-06,
73
+ "loss": 0.417,
74
  "step": 275
75
  },
76
  {
77
+ "epoch": 0.11,
78
+ "learning_rate": 5.94e-06,
79
+ "loss": 0.4122,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 0.12,
84
+ "learning_rate": 6.440000000000001e-06,
85
+ "loss": 0.3987,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 0.13,
90
+ "learning_rate": 6.9400000000000005e-06,
91
+ "loss": 0.3986,
92
  "step": 350
93
  },
94
  {
95
+ "epoch": 0.14,
96
+ "learning_rate": 7.440000000000001e-06,
97
+ "loss": 0.4054,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 0.15,
102
+ "learning_rate": 7.94e-06,
103
+ "loss": 0.3771,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 0.16,
108
+ "learning_rate": 8.44e-06,
109
+ "loss": 0.4053,
110
  "step": 425
111
  },
112
  {
113
+ "epoch": 0.17,
114
+ "learning_rate": 8.94e-06,
115
+ "loss": 0.3854,
116
  "step": 450
117
  },
118
  {
119
+ "epoch": 0.18,
120
+ "learning_rate": 9.440000000000001e-06,
121
+ "loss": 0.3699,
122
  "step": 475
123
  },
124
  {
125
+ "epoch": 0.19,
126
+ "learning_rate": 9.940000000000001e-06,
127
+ "loss": 0.355,
128
  "step": 500
129
  },
130
  {
131
+ "epoch": 0.2,
132
+ "learning_rate": 9.976842105263158e-06,
133
+ "loss": 0.3734,
134
  "step": 525
135
  },
136
  {
137
+ "epoch": 0.21,
138
+ "learning_rate": 9.950526315789475e-06,
139
+ "loss": 0.3522,
140
  "step": 550
141
  },
142
  {
143
+ "epoch": 0.22,
144
+ "learning_rate": 9.92421052631579e-06,
145
+ "loss": 0.3296,
146
  "step": 575
147
  },
148
  {
149
+ "epoch": 0.22,
150
+ "learning_rate": 9.897894736842107e-06,
151
+ "loss": 0.3258,
152
  "step": 600
153
  },
154
  {
155
+ "epoch": 0.23,
156
+ "learning_rate": 9.871578947368422e-06,
157
+ "loss": 0.3292,
158
  "step": 625
159
  },
160
  {
161
+ "epoch": 0.24,
162
+ "learning_rate": 9.845263157894738e-06,
163
+ "loss": 0.3251,
164
  "step": 650
165
  },
166
  {
167
+ "epoch": 0.25,
168
+ "learning_rate": 9.818947368421053e-06,
169
+ "loss": 0.3431,
170
  "step": 675
171
  },
172
  {
173
+ "epoch": 0.26,
174
+ "learning_rate": 9.79263157894737e-06,
175
+ "loss": 0.3267,
176
  "step": 700
177
  },
178
  {
179
+ "epoch": 0.27,
180
+ "learning_rate": 9.766315789473685e-06,
181
+ "loss": 0.336,
182
  "step": 725
183
  },
184
  {
185
+ "epoch": 0.28,
186
+ "learning_rate": 9.74e-06,
187
+ "loss": 0.3626,
188
  "step": 750
189
  },
190
  {
191
+ "epoch": 0.29,
192
+ "learning_rate": 9.713684210526317e-06,
193
+ "loss": 0.3152,
194
  "step": 775
195
  },
196
  {
197
+ "epoch": 0.3,
198
+ "learning_rate": 9.687368421052632e-06,
199
+ "loss": 0.2871,
200
  "step": 800
201
  },
202
  {
203
+ "epoch": 0.31,
204
+ "learning_rate": 9.661052631578948e-06,
205
+ "loss": 0.3152,
206
  "step": 825
207
  },
208
  {
209
+ "epoch": 0.32,
210
+ "learning_rate": 9.634736842105265e-06,
211
+ "loss": 0.3014,
212
  "step": 850
213
  },
214
  {
215
+ "epoch": 0.33,
216
+ "learning_rate": 9.60842105263158e-06,
217
+ "loss": 0.2967,
218
  "step": 875
219
  },
220
  {
221
+ "epoch": 0.34,
222
+ "learning_rate": 9.582105263157897e-06,
223
+ "loss": 0.3026,
224
  "step": 900
225
  },
226
  {
227
+ "epoch": 0.35,
228
+ "learning_rate": 9.555789473684211e-06,
229
+ "loss": 0.2893,
230
  "step": 925
231
  },
232
  {
233
+ "epoch": 0.36,
234
+ "learning_rate": 9.529473684210528e-06,
235
+ "loss": 0.2983,
236
  "step": 950
237
  },
238
  {
239
+ "epoch": 0.36,
240
+ "learning_rate": 9.503157894736843e-06,
241
+ "loss": 0.2903,
242
  "step": 975
243
  },
244
  {
245
+ "epoch": 0.37,
246
+ "learning_rate": 9.476842105263158e-06,
247
+ "loss": 0.2734,
 
 
 
 
 
 
 
 
 
248
  "step": 1000
249
  },
250
  {
251
+ "epoch": 0.38,
252
+ "learning_rate": 9.450526315789475e-06,
253
+ "loss": 0.2947,
254
  "step": 1025
255
  },
256
  {
257
+ "epoch": 0.39,
258
+ "learning_rate": 9.42421052631579e-06,
259
+ "loss": 0.2876,
260
  "step": 1050
261
  },
262
  {
263
+ "epoch": 0.4,
264
+ "learning_rate": 9.397894736842106e-06,
265
+ "loss": 0.275,
266
  "step": 1075
267
  },
268
  {
269
+ "epoch": 0.41,
270
+ "learning_rate": 9.371578947368421e-06,
271
+ "loss": 0.2818,
272
  "step": 1100
273
  },
274
  {
275
+ "epoch": 0.42,
276
+ "learning_rate": 9.345263157894738e-06,
277
+ "loss": 0.2714,
278
  "step": 1125
279
  },
280
  {
281
+ "epoch": 0.43,
282
+ "learning_rate": 9.318947368421053e-06,
283
+ "loss": 0.2648,
284
  "step": 1150
285
  },
286
  {
287
+ "epoch": 0.44,
288
+ "learning_rate": 9.292631578947368e-06,
289
+ "loss": 0.2845,
290
  "step": 1175
291
  },
292
  {
293
+ "epoch": 0.45,
294
+ "learning_rate": 9.266315789473685e-06,
295
+ "loss": 0.2606,
296
  "step": 1200
297
  },
298
  {
299
+ "epoch": 0.46,
300
+ "learning_rate": 9.240000000000001e-06,
301
+ "loss": 0.2757,
302
  "step": 1225
303
  },
304
  {
305
+ "epoch": 0.47,
306
+ "learning_rate": 9.213684210526316e-06,
307
+ "loss": 0.2595,
308
  "step": 1250
309
  },
310
  {
311
+ "epoch": 0.48,
312
+ "learning_rate": 9.187368421052633e-06,
313
+ "loss": 0.2589,
314
  "step": 1275
315
  },
316
  {
317
+ "epoch": 0.49,
318
+ "learning_rate": 9.161052631578948e-06,
319
+ "loss": 0.2851,
320
  "step": 1300
321
  },
322
  {
323
+ "epoch": 0.5,
324
+ "learning_rate": 9.134736842105265e-06,
325
+ "loss": 0.2528,
326
  "step": 1325
327
  },
328
  {
329
+ "epoch": 0.5,
330
+ "learning_rate": 9.10842105263158e-06,
331
+ "loss": 0.2596,
332
  "step": 1350
333
  },
334
  {
335
+ "epoch": 0.51,
336
+ "learning_rate": 9.082105263157896e-06,
337
+ "loss": 0.2529,
338
  "step": 1375
339
  },
340
  {
341
+ "epoch": 0.52,
342
+ "learning_rate": 9.055789473684211e-06,
343
+ "loss": 0.255,
344
  "step": 1400
345
  },
346
  {
347
+ "epoch": 0.53,
348
+ "learning_rate": 9.029473684210526e-06,
349
+ "loss": 0.2448,
350
  "step": 1425
351
  },
352
  {
353
+ "epoch": 0.54,
354
+ "learning_rate": 9.003157894736843e-06,
355
+ "loss": 0.2463,
356
  "step": 1450
357
  },
358
  {
359
+ "epoch": 0.55,
360
+ "learning_rate": 8.976842105263158e-06,
361
+ "loss": 0.2353,
362
  "step": 1475
363
  },
364
  {
365
+ "epoch": 0.56,
366
+ "learning_rate": 8.950526315789474e-06,
367
+ "loss": 0.276,
368
  "step": 1500
369
  },
370
  {
371
+ "epoch": 0.57,
372
+ "learning_rate": 8.92421052631579e-06,
373
+ "loss": 0.2342,
374
  "step": 1525
375
  },
376
  {
377
+ "epoch": 0.58,
378
+ "learning_rate": 8.897894736842106e-06,
379
+ "loss": 0.2423,
380
  "step": 1550
381
  },
382
  {
383
+ "epoch": 0.59,
384
+ "learning_rate": 8.871578947368423e-06,
385
+ "loss": 0.2332,
386
  "step": 1575
387
  },
388
  {
389
+ "epoch": 0.6,
390
+ "learning_rate": 8.845263157894738e-06,
391
+ "loss": 0.2464,
392
  "step": 1600
393
  },
394
  {
395
+ "epoch": 0.61,
396
+ "learning_rate": 8.818947368421054e-06,
397
+ "loss": 0.2404,
398
  "step": 1625
399
  },
400
  {
401
+ "epoch": 0.62,
402
+ "learning_rate": 8.79263157894737e-06,
403
+ "loss": 0.2332,
404
  "step": 1650
405
  },
406
  {
407
+ "epoch": 0.63,
408
+ "learning_rate": 8.766315789473684e-06,
409
+ "loss": 0.266,
410
  "step": 1675
411
  },
412
  {
413
+ "epoch": 0.64,
414
+ "learning_rate": 8.740000000000001e-06,
415
+ "loss": 0.2578,
416
  "step": 1700
417
  },
418
  {
419
+ "epoch": 0.65,
420
+ "learning_rate": 8.713684210526316e-06,
421
+ "loss": 0.2342,
422
  "step": 1725
423
  },
424
  {
425
+ "epoch": 0.65,
426
+ "learning_rate": 8.687368421052633e-06,
427
+ "loss": 0.2404,
428
  "step": 1750
429
  },
430
  {
431
+ "epoch": 0.66,
432
+ "learning_rate": 8.661052631578948e-06,
433
+ "loss": 0.2142,
434
  "step": 1775
435
  },
436
  {
437
+ "epoch": 0.67,
438
+ "learning_rate": 8.634736842105264e-06,
439
+ "loss": 0.2407,
440
  "step": 1800
441
  },
442
  {
443
+ "epoch": 0.68,
444
+ "learning_rate": 8.60842105263158e-06,
445
+ "loss": 0.2247,
446
  "step": 1825
447
  },
448
  {
449
+ "epoch": 0.69,
450
+ "learning_rate": 8.582105263157894e-06,
451
+ "loss": 0.2416,
452
  "step": 1850
453
  },
454
  {
455
+ "epoch": 0.7,
456
+ "learning_rate": 8.555789473684213e-06,
457
+ "loss": 0.2256,
458
  "step": 1875
459
  },
460
  {
461
+ "epoch": 0.71,
462
+ "learning_rate": 8.529473684210528e-06,
463
+ "loss": 0.2279,
464
  "step": 1900
465
  },
466
  {
467
+ "epoch": 0.72,
468
+ "learning_rate": 8.503157894736843e-06,
469
+ "loss": 0.2352,
470
  "step": 1925
471
  },
472
  {
473
+ "epoch": 0.73,
474
+ "learning_rate": 8.47684210526316e-06,
475
+ "loss": 0.2055,
476
  "step": 1950
477
  },
478
  {
479
+ "epoch": 0.74,
480
+ "learning_rate": 8.450526315789474e-06,
481
+ "loss": 0.2436,
482
  "step": 1975
483
  },
484
  {
485
+ "epoch": 0.75,
486
+ "learning_rate": 8.42421052631579e-06,
487
+ "loss": 0.2192,
 
 
 
 
 
 
 
 
 
488
  "step": 2000
489
  },
490
  {
491
+ "epoch": 0.76,
492
+ "learning_rate": 8.397894736842106e-06,
493
+ "loss": 0.2153,
494
  "step": 2025
495
  },
496
  {
497
+ "epoch": 0.77,
498
+ "learning_rate": 8.371578947368422e-06,
499
+ "loss": 0.2288,
500
  "step": 2050
501
  },
502
  {
503
+ "epoch": 0.78,
504
+ "learning_rate": 8.345263157894737e-06,
505
+ "loss": 0.2113,
506
  "step": 2075
507
  },
508
  {
509
+ "epoch": 0.79,
510
+ "learning_rate": 8.318947368421052e-06,
511
+ "loss": 0.22,
512
  "step": 2100
513
  },
514
  {
515
+ "epoch": 0.79,
516
+ "learning_rate": 8.292631578947369e-06,
517
+ "loss": 0.2353,
518
  "step": 2125
519
  },
520
  {
521
+ "epoch": 0.8,
522
+ "learning_rate": 8.266315789473684e-06,
523
+ "loss": 0.2166,
524
  "step": 2150
525
  },
526
  {
527
+ "epoch": 0.81,
528
+ "learning_rate": 8.24e-06,
529
+ "loss": 0.2308,
530
  "step": 2175
531
  },
532
  {
533
+ "epoch": 0.82,
534
+ "learning_rate": 8.213684210526316e-06,
535
+ "loss": 0.2223,
536
  "step": 2200
537
  },
538
  {
539
+ "epoch": 0.83,
540
+ "learning_rate": 8.187368421052632e-06,
541
+ "loss": 0.2062,
542
  "step": 2225
543
  },
544
  {
545
+ "epoch": 0.84,
546
+ "learning_rate": 8.161052631578949e-06,
547
+ "loss": 0.2152,
548
  "step": 2250
549
  },
550
  {
551
+ "epoch": 0.85,
552
+ "learning_rate": 8.134736842105264e-06,
553
+ "loss": 0.2102,
554
  "step": 2275
555
  },
556
  {
557
+ "epoch": 0.86,
558
+ "learning_rate": 8.10842105263158e-06,
559
+ "loss": 0.2109,
560
  "step": 2300
561
  },
562
  {
563
+ "epoch": 0.87,
564
+ "learning_rate": 8.082105263157896e-06,
565
+ "loss": 0.2088,
566
  "step": 2325
567
  },
568
  {
569
+ "epoch": 0.88,
570
+ "learning_rate": 8.05578947368421e-06,
571
+ "loss": 0.2251,
572
  "step": 2350
573
  },
574
  {
575
+ "epoch": 0.89,
576
+ "learning_rate": 8.029473684210527e-06,
577
+ "loss": 0.1999,
578
  "step": 2375
579
  },
580
  {
581
+ "epoch": 0.9,
582
+ "learning_rate": 8.003157894736842e-06,
583
+ "loss": 0.2096,
584
  "step": 2400
585
  },
586
  {
587
+ "epoch": 0.91,
588
+ "learning_rate": 7.976842105263159e-06,
589
+ "loss": 0.2063,
590
  "step": 2425
591
  },
592
  {
593
+ "epoch": 0.92,
594
+ "learning_rate": 7.950526315789474e-06,
595
+ "loss": 0.1951,
596
  "step": 2450
597
  },
598
  {
599
+ "epoch": 0.93,
600
+ "learning_rate": 7.92421052631579e-06,
601
+ "loss": 0.2071,
602
  "step": 2475
603
  },
604
  {
605
+ "epoch": 0.93,
606
+ "learning_rate": 7.897894736842106e-06,
607
+ "loss": 0.1938,
608
  "step": 2500
609
  },
610
  {
611
+ "epoch": 0.94,
612
+ "learning_rate": 7.87157894736842e-06,
613
+ "loss": 0.1833,
614
  "step": 2525
615
  },
616
  {
617
+ "epoch": 0.95,
618
+ "learning_rate": 7.845263157894739e-06,
619
+ "loss": 0.2094,
620
  "step": 2550
621
  },
622
  {
623
+ "epoch": 0.96,
624
+ "learning_rate": 7.818947368421054e-06,
625
+ "loss": 0.2129,
626
  "step": 2575
627
  },
628
  {
629
+ "epoch": 0.97,
630
+ "learning_rate": 7.792631578947369e-06,
631
+ "loss": 0.2211,
632
  "step": 2600
633
  },
634
  {
635
+ "epoch": 0.98,
636
+ "learning_rate": 7.766315789473685e-06,
637
+ "loss": 0.1998,
638
  "step": 2625
639
  },
640
  {
641
+ "epoch": 0.99,
642
+ "learning_rate": 7.74e-06,
643
+ "loss": 0.1936,
644
  "step": 2650
645
  },
646
  {
647
+ "epoch": 1.0,
648
+ "learning_rate": 7.713684210526317e-06,
649
+ "loss": 0.216,
650
  "step": 2675
651
  },
652
  {
653
+ "epoch": 1.01,
654
+ "learning_rate": 7.687368421052632e-06,
655
+ "loss": 0.1361,
656
  "step": 2700
657
  },
658
  {
659
+ "epoch": 1.02,
660
+ "learning_rate": 7.661052631578949e-06,
661
+ "loss": 0.1317,
662
  "step": 2725
663
  },
664
  {
665
+ "epoch": 1.03,
666
+ "learning_rate": 7.634736842105264e-06,
667
+ "loss": 0.1479,
668
  "step": 2750
669
  },
670
  {
671
+ "epoch": 1.04,
672
+ "learning_rate": 7.6084210526315795e-06,
673
+ "loss": 0.1223,
674
  "step": 2775
675
  },
676
  {
677
+ "epoch": 1.05,
678
+ "learning_rate": 7.582105263157895e-06,
679
+ "loss": 0.136,
680
  "step": 2800
681
  },
682
  {
683
+ "epoch": 1.06,
684
+ "learning_rate": 7.555789473684211e-06,
685
+ "loss": 0.1326,
686
  "step": 2825
687
  },
688
  {
689
+ "epoch": 1.07,
690
+ "learning_rate": 7.529473684210526e-06,
691
+ "loss": 0.1389,
692
  "step": 2850
693
  },
694
  {
695
+ "epoch": 1.08,
696
+ "learning_rate": 7.503157894736842e-06,
697
+ "loss": 0.1281,
698
  "step": 2875
699
  },
700
  {
701
+ "epoch": 1.08,
702
+ "learning_rate": 7.476842105263159e-06,
703
+ "loss": 0.1304,
704
  "step": 2900
705
  },
706
  {
707
+ "epoch": 1.09,
708
+ "learning_rate": 7.4505263157894744e-06,
709
+ "loss": 0.1369,
710
  "step": 2925
711
  },
712
  {
713
+ "epoch": 1.1,
714
+ "learning_rate": 7.42421052631579e-06,
715
+ "loss": 0.1298,
716
  "step": 2950
717
  },
718
  {
719
+ "epoch": 1.11,
720
+ "learning_rate": 7.397894736842106e-06,
721
+ "loss": 0.1381,
722
  "step": 2975
723
  },
724
  {
725
+ "epoch": 1.12,
726
+ "learning_rate": 7.371578947368422e-06,
727
+ "loss": 0.1295,
728
  "step": 3000
729
  },
730
  {
731
+ "epoch": 1.13,
732
+ "learning_rate": 7.345263157894738e-06,
733
+ "loss": 0.1207,
734
+ "step": 3025
735
+ },
736
+ {
737
+ "epoch": 1.14,
738
+ "learning_rate": 7.3189473684210535e-06,
739
+ "loss": 0.1275,
740
+ "step": 3050
741
+ },
742
+ {
743
+ "epoch": 1.15,
744
+ "learning_rate": 7.292631578947369e-06,
745
+ "loss": 0.1276,
746
+ "step": 3075
747
+ },
748
+ {
749
+ "epoch": 1.16,
750
+ "learning_rate": 7.266315789473684e-06,
751
+ "loss": 0.1249,
752
+ "step": 3100
753
+ },
754
+ {
755
+ "epoch": 1.17,
756
+ "learning_rate": 7.24e-06,
757
+ "loss": 0.1247,
758
+ "step": 3125
759
+ },
760
+ {
761
+ "epoch": 1.18,
762
+ "learning_rate": 7.213684210526316e-06,
763
+ "loss": 0.1284,
764
+ "step": 3150
765
+ },
766
+ {
767
+ "epoch": 1.19,
768
+ "learning_rate": 7.187368421052632e-06,
769
+ "loss": 0.1253,
770
+ "step": 3175
771
+ },
772
+ {
773
+ "epoch": 1.2,
774
+ "learning_rate": 7.161052631578948e-06,
775
+ "loss": 0.134,
776
+ "step": 3200
777
+ },
778
+ {
779
+ "epoch": 1.21,
780
+ "learning_rate": 7.134736842105264e-06,
781
+ "loss": 0.1452,
782
+ "step": 3225
783
+ },
784
+ {
785
+ "epoch": 1.22,
786
+ "learning_rate": 7.10842105263158e-06,
787
+ "loss": 0.1579,
788
+ "step": 3250
789
+ },
790
+ {
791
+ "epoch": 1.22,
792
+ "learning_rate": 7.082105263157896e-06,
793
+ "loss": 0.1282,
794
+ "step": 3275
795
+ },
796
+ {
797
+ "epoch": 1.23,
798
+ "learning_rate": 7.055789473684212e-06,
799
+ "loss": 0.1314,
800
+ "step": 3300
801
+ },
802
+ {
803
+ "epoch": 1.24,
804
+ "learning_rate": 7.0294736842105275e-06,
805
+ "loss": 0.115,
806
+ "step": 3325
807
+ },
808
+ {
809
+ "epoch": 1.25,
810
+ "learning_rate": 7.0031578947368425e-06,
811
+ "loss": 0.137,
812
+ "step": 3350
813
+ },
814
+ {
815
+ "epoch": 1.26,
816
+ "learning_rate": 6.976842105263158e-06,
817
+ "loss": 0.1145,
818
+ "step": 3375
819
+ },
820
+ {
821
+ "epoch": 1.27,
822
+ "learning_rate": 6.950526315789474e-06,
823
+ "loss": 0.1378,
824
+ "step": 3400
825
+ },
826
+ {
827
+ "epoch": 1.28,
828
+ "learning_rate": 6.92421052631579e-06,
829
+ "loss": 0.1218,
830
+ "step": 3425
831
+ },
832
+ {
833
+ "epoch": 1.29,
834
+ "learning_rate": 6.897894736842106e-06,
835
+ "loss": 0.1218,
836
+ "step": 3450
837
+ },
838
+ {
839
+ "epoch": 1.3,
840
+ "learning_rate": 6.871578947368422e-06,
841
+ "loss": 0.118,
842
+ "step": 3475
843
+ },
844
+ {
845
+ "epoch": 1.31,
846
+ "learning_rate": 6.845263157894737e-06,
847
+ "loss": 0.1192,
848
+ "step": 3500
849
+ },
850
+ {
851
+ "epoch": 1.32,
852
+ "learning_rate": 6.818947368421052e-06,
853
+ "loss": 0.1162,
854
+ "step": 3525
855
+ },
856
+ {
857
+ "epoch": 1.33,
858
+ "learning_rate": 6.792631578947368e-06,
859
+ "loss": 0.1225,
860
+ "step": 3550
861
+ },
862
+ {
863
+ "epoch": 1.34,
864
+ "learning_rate": 6.766315789473685e-06,
865
+ "loss": 0.1302,
866
+ "step": 3575
867
+ },
868
+ {
869
+ "epoch": 1.35,
870
+ "learning_rate": 6.740000000000001e-06,
871
+ "loss": 0.1425,
872
+ "step": 3600
873
+ },
874
+ {
875
+ "epoch": 1.36,
876
+ "learning_rate": 6.7136842105263165e-06,
877
+ "loss": 0.1166,
878
+ "step": 3625
879
+ },
880
+ {
881
+ "epoch": 1.36,
882
+ "learning_rate": 6.687368421052632e-06,
883
+ "loss": 0.1307,
884
+ "step": 3650
885
+ },
886
+ {
887
+ "epoch": 1.37,
888
+ "learning_rate": 6.661052631578948e-06,
889
+ "loss": 0.1366,
890
+ "step": 3675
891
+ },
892
+ {
893
+ "epoch": 1.38,
894
+ "learning_rate": 6.634736842105264e-06,
895
+ "loss": 0.1209,
896
+ "step": 3700
897
+ },
898
+ {
899
+ "epoch": 1.39,
900
+ "learning_rate": 6.60842105263158e-06,
901
+ "loss": 0.1359,
902
+ "step": 3725
903
+ },
904
+ {
905
+ "epoch": 1.4,
906
+ "learning_rate": 6.582105263157896e-06,
907
+ "loss": 0.1288,
908
+ "step": 3750
909
+ },
910
+ {
911
+ "epoch": 1.41,
912
+ "learning_rate": 6.5557894736842106e-06,
913
+ "loss": 0.1205,
914
+ "step": 3775
915
+ },
916
+ {
917
+ "epoch": 1.42,
918
+ "learning_rate": 6.529473684210526e-06,
919
+ "loss": 0.122,
920
+ "step": 3800
921
+ },
922
+ {
923
+ "epoch": 1.43,
924
+ "learning_rate": 6.503157894736842e-06,
925
+ "loss": 0.1242,
926
+ "step": 3825
927
+ },
928
+ {
929
+ "epoch": 1.44,
930
+ "learning_rate": 6.476842105263158e-06,
931
+ "loss": 0.1156,
932
+ "step": 3850
933
+ },
934
+ {
935
+ "epoch": 1.45,
936
+ "learning_rate": 6.450526315789474e-06,
937
+ "loss": 0.1233,
938
+ "step": 3875
939
+ },
940
+ {
941
+ "epoch": 1.46,
942
+ "learning_rate": 6.42421052631579e-06,
943
+ "loss": 0.1171,
944
+ "step": 3900
945
+ },
946
+ {
947
+ "epoch": 1.47,
948
+ "learning_rate": 6.397894736842106e-06,
949
+ "loss": 0.1294,
950
+ "step": 3925
951
+ },
952
+ {
953
+ "epoch": 1.48,
954
+ "learning_rate": 6.371578947368422e-06,
955
+ "loss": 0.1217,
956
+ "step": 3950
957
+ },
958
+ {
959
+ "epoch": 1.49,
960
+ "learning_rate": 6.345263157894738e-06,
961
+ "loss": 0.1305,
962
+ "step": 3975
963
+ },
964
+ {
965
+ "epoch": 1.5,
966
+ "learning_rate": 6.318947368421054e-06,
967
+ "loss": 0.1166,
968
+ "step": 4000
969
+ },
970
+ {
971
+ "epoch": 1.51,
972
+ "learning_rate": 6.292631578947369e-06,
973
+ "loss": 0.1159,
974
+ "step": 4025
975
+ },
976
+ {
977
+ "epoch": 1.51,
978
+ "learning_rate": 6.266315789473685e-06,
979
+ "loss": 0.1193,
980
+ "step": 4050
981
+ },
982
+ {
983
+ "epoch": 1.52,
984
+ "learning_rate": 6.24e-06,
985
+ "loss": 0.1286,
986
+ "step": 4075
987
+ },
988
+ {
989
+ "epoch": 1.53,
990
+ "learning_rate": 6.213684210526316e-06,
991
+ "loss": 0.1237,
992
+ "step": 4100
993
+ },
994
+ {
995
+ "epoch": 1.54,
996
+ "learning_rate": 6.187368421052632e-06,
997
+ "loss": 0.1299,
998
+ "step": 4125
999
+ },
1000
+ {
1001
+ "epoch": 1.55,
1002
+ "learning_rate": 6.161052631578948e-06,
1003
+ "loss": 0.1286,
1004
+ "step": 4150
1005
+ },
1006
+ {
1007
+ "epoch": 1.56,
1008
+ "learning_rate": 6.134736842105264e-06,
1009
+ "loss": 0.1458,
1010
+ "step": 4175
1011
+ },
1012
+ {
1013
+ "epoch": 1.57,
1014
+ "learning_rate": 6.108421052631579e-06,
1015
+ "loss": 0.133,
1016
+ "step": 4200
1017
+ },
1018
+ {
1019
+ "epoch": 1.58,
1020
+ "learning_rate": 6.0821052631578945e-06,
1021
+ "loss": 0.1091,
1022
+ "step": 4225
1023
+ },
1024
+ {
1025
+ "epoch": 1.59,
1026
+ "learning_rate": 6.055789473684212e-06,
1027
+ "loss": 0.1205,
1028
+ "step": 4250
1029
+ },
1030
+ {
1031
+ "epoch": 1.6,
1032
+ "learning_rate": 6.029473684210527e-06,
1033
+ "loss": 0.1151,
1034
+ "step": 4275
1035
+ },
1036
+ {
1037
+ "epoch": 1.61,
1038
+ "learning_rate": 6.003157894736843e-06,
1039
+ "loss": 0.119,
1040
+ "step": 4300
1041
+ },
1042
+ {
1043
+ "epoch": 1.62,
1044
+ "learning_rate": 5.976842105263159e-06,
1045
+ "loss": 0.1374,
1046
+ "step": 4325
1047
+ },
1048
+ {
1049
+ "epoch": 1.63,
1050
+ "learning_rate": 5.950526315789474e-06,
1051
+ "loss": 0.1307,
1052
+ "step": 4350
1053
+ },
1054
+ {
1055
+ "epoch": 1.64,
1056
+ "learning_rate": 5.92421052631579e-06,
1057
+ "loss": 0.1207,
1058
+ "step": 4375
1059
+ },
1060
+ {
1061
+ "epoch": 1.65,
1062
+ "learning_rate": 5.897894736842106e-06,
1063
+ "loss": 0.1104,
1064
+ "step": 4400
1065
+ },
1066
+ {
1067
+ "epoch": 1.65,
1068
+ "learning_rate": 5.871578947368422e-06,
1069
+ "loss": 0.1074,
1070
+ "step": 4425
1071
+ },
1072
+ {
1073
+ "epoch": 1.66,
1074
+ "learning_rate": 5.845263157894737e-06,
1075
+ "loss": 0.1256,
1076
+ "step": 4450
1077
+ },
1078
+ {
1079
+ "epoch": 1.67,
1080
+ "learning_rate": 5.818947368421053e-06,
1081
+ "loss": 0.1345,
1082
+ "step": 4475
1083
+ },
1084
+ {
1085
+ "epoch": 1.68,
1086
+ "learning_rate": 5.7926315789473685e-06,
1087
+ "loss": 0.1171,
1088
+ "step": 4500
1089
+ },
1090
+ {
1091
+ "epoch": 1.69,
1092
+ "learning_rate": 5.766315789473684e-06,
1093
+ "loss": 0.1166,
1094
+ "step": 4525
1095
+ },
1096
+ {
1097
+ "epoch": 1.7,
1098
+ "learning_rate": 5.74e-06,
1099
+ "loss": 0.1242,
1100
+ "step": 4550
1101
+ },
1102
+ {
1103
+ "epoch": 1.71,
1104
+ "learning_rate": 5.713684210526316e-06,
1105
+ "loss": 0.1148,
1106
+ "step": 4575
1107
+ },
1108
+ {
1109
+ "epoch": 1.72,
1110
+ "learning_rate": 5.687368421052633e-06,
1111
+ "loss": 0.12,
1112
+ "step": 4600
1113
+ },
1114
+ {
1115
+ "epoch": 1.73,
1116
+ "learning_rate": 5.661052631578948e-06,
1117
+ "loss": 0.1255,
1118
+ "step": 4625
1119
+ },
1120
+ {
1121
+ "epoch": 1.74,
1122
+ "learning_rate": 5.634736842105264e-06,
1123
+ "loss": 0.1083,
1124
+ "step": 4650
1125
+ },
1126
+ {
1127
+ "epoch": 1.75,
1128
+ "learning_rate": 5.60842105263158e-06,
1129
+ "loss": 0.1199,
1130
+ "step": 4675
1131
+ },
1132
+ {
1133
+ "epoch": 1.76,
1134
+ "learning_rate": 5.582105263157895e-06,
1135
+ "loss": 0.1183,
1136
+ "step": 4700
1137
+ },
1138
+ {
1139
+ "epoch": 1.77,
1140
+ "learning_rate": 5.555789473684211e-06,
1141
+ "loss": 0.1213,
1142
+ "step": 4725
1143
+ },
1144
+ {
1145
+ "epoch": 1.78,
1146
+ "learning_rate": 5.529473684210527e-06,
1147
+ "loss": 0.132,
1148
+ "step": 4750
1149
+ },
1150
+ {
1151
+ "epoch": 1.79,
1152
+ "learning_rate": 5.5031578947368425e-06,
1153
+ "loss": 0.112,
1154
+ "step": 4775
1155
+ },
1156
+ {
1157
+ "epoch": 1.8,
1158
+ "learning_rate": 5.476842105263158e-06,
1159
+ "loss": 0.107,
1160
+ "step": 4800
1161
+ },
1162
+ {
1163
+ "epoch": 1.8,
1164
+ "learning_rate": 5.450526315789474e-06,
1165
+ "loss": 0.1277,
1166
+ "step": 4825
1167
+ },
1168
+ {
1169
+ "epoch": 1.81,
1170
+ "learning_rate": 5.42421052631579e-06,
1171
+ "loss": 0.1206,
1172
+ "step": 4850
1173
+ },
1174
+ {
1175
+ "epoch": 1.82,
1176
+ "learning_rate": 5.397894736842105e-06,
1177
+ "loss": 0.1144,
1178
+ "step": 4875
1179
+ },
1180
+ {
1181
+ "epoch": 1.83,
1182
+ "learning_rate": 5.371578947368421e-06,
1183
+ "loss": 0.1301,
1184
+ "step": 4900
1185
+ },
1186
+ {
1187
+ "epoch": 1.84,
1188
+ "learning_rate": 5.345263157894738e-06,
1189
+ "loss": 0.1204,
1190
+ "step": 4925
1191
+ },
1192
+ {
1193
+ "epoch": 1.85,
1194
+ "learning_rate": 5.318947368421053e-06,
1195
+ "loss": 0.1301,
1196
+ "step": 4950
1197
+ },
1198
+ {
1199
+ "epoch": 1.86,
1200
+ "learning_rate": 5.292631578947369e-06,
1201
+ "loss": 0.108,
1202
+ "step": 4975
1203
+ },
1204
+ {
1205
+ "epoch": 1.87,
1206
+ "learning_rate": 5.266315789473685e-06,
1207
+ "loss": 0.1186,
1208
+ "step": 5000
1209
+ },
1210
+ {
1211
+ "epoch": 1.87,
1212
+ "eval_loss": 0.15749630331993103,
1213
+ "eval_runtime": 5122.7824,
1214
+ "eval_samples_per_second": 2.134,
1215
+ "eval_steps_per_second": 0.267,
1216
+ "eval_wer": 511.77750577995374,
1217
+ "step": 5000
1218
+ },
1219
+ {
1220
+ "epoch": 1.88,
1221
+ "learning_rate": 5.240000000000001e-06,
1222
+ "loss": 0.1077,
1223
+ "step": 5025
1224
+ },
1225
+ {
1226
+ "epoch": 1.89,
1227
+ "learning_rate": 5.2136842105263165e-06,
1228
+ "loss": 0.1183,
1229
+ "step": 5050
1230
+ },
1231
+ {
1232
+ "epoch": 1.9,
1233
+ "learning_rate": 5.187368421052632e-06,
1234
+ "loss": 0.122,
1235
+ "step": 5075
1236
+ },
1237
+ {
1238
+ "epoch": 1.91,
1239
+ "learning_rate": 5.161052631578948e-06,
1240
+ "loss": 0.1202,
1241
+ "step": 5100
1242
+ },
1243
+ {
1244
+ "epoch": 1.92,
1245
+ "learning_rate": 5.134736842105263e-06,
1246
+ "loss": 0.127,
1247
+ "step": 5125
1248
+ },
1249
+ {
1250
+ "epoch": 1.93,
1251
+ "learning_rate": 5.108421052631579e-06,
1252
+ "loss": 0.1197,
1253
+ "step": 5150
1254
+ },
1255
+ {
1256
+ "epoch": 1.94,
1257
+ "learning_rate": 5.082105263157895e-06,
1258
+ "loss": 0.1142,
1259
+ "step": 5175
1260
+ },
1261
+ {
1262
+ "epoch": 1.94,
1263
+ "learning_rate": 5.0557894736842105e-06,
1264
+ "loss": 0.1361,
1265
+ "step": 5200
1266
+ },
1267
+ {
1268
+ "epoch": 1.95,
1269
+ "learning_rate": 5.030526315789475e-06,
1270
+ "loss": 0.1096,
1271
+ "step": 5225
1272
+ },
1273
+ {
1274
+ "epoch": 1.96,
1275
+ "learning_rate": 5.0042105263157906e-06,
1276
+ "loss": 0.1158,
1277
+ "step": 5250
1278
+ },
1279
+ {
1280
+ "epoch": 1.97,
1281
+ "learning_rate": 4.9778947368421055e-06,
1282
+ "loss": 0.1218,
1283
+ "step": 5275
1284
+ },
1285
+ {
1286
+ "epoch": 1.98,
1287
+ "learning_rate": 4.951578947368421e-06,
1288
+ "loss": 0.1121,
1289
+ "step": 5300
1290
+ },
1291
+ {
1292
+ "epoch": 1.99,
1293
+ "learning_rate": 4.925263157894737e-06,
1294
+ "loss": 0.1224,
1295
+ "step": 5325
1296
+ },
1297
+ {
1298
+ "epoch": 2.0,
1299
+ "learning_rate": 4.898947368421053e-06,
1300
+ "loss": 0.1031,
1301
+ "step": 5350
1302
+ },
1303
+ {
1304
+ "epoch": 2.01,
1305
+ "learning_rate": 4.872631578947369e-06,
1306
+ "loss": 0.0677,
1307
+ "step": 5375
1308
+ },
1309
+ {
1310
+ "epoch": 2.02,
1311
+ "learning_rate": 4.846315789473685e-06,
1312
+ "loss": 0.0618,
1313
+ "step": 5400
1314
+ },
1315
+ {
1316
+ "epoch": 2.03,
1317
+ "learning_rate": 4.8200000000000004e-06,
1318
+ "loss": 0.06,
1319
+ "step": 5425
1320
+ },
1321
+ {
1322
+ "epoch": 2.04,
1323
+ "learning_rate": 4.793684210526316e-06,
1324
+ "loss": 0.0526,
1325
+ "step": 5450
1326
+ },
1327
+ {
1328
+ "epoch": 2.05,
1329
+ "learning_rate": 4.767368421052632e-06,
1330
+ "loss": 0.0642,
1331
+ "step": 5475
1332
+ },
1333
+ {
1334
+ "epoch": 2.06,
1335
+ "learning_rate": 4.741052631578948e-06,
1336
+ "loss": 0.0702,
1337
+ "step": 5500
1338
+ },
1339
+ {
1340
+ "epoch": 2.07,
1341
+ "learning_rate": 4.714736842105264e-06,
1342
+ "loss": 0.0583,
1343
+ "step": 5525
1344
+ },
1345
+ {
1346
+ "epoch": 2.08,
1347
+ "learning_rate": 4.6884210526315795e-06,
1348
+ "loss": 0.0608,
1349
+ "step": 5550
1350
+ },
1351
+ {
1352
+ "epoch": 2.08,
1353
+ "learning_rate": 4.662105263157895e-06,
1354
+ "loss": 0.0633,
1355
+ "step": 5575
1356
+ },
1357
+ {
1358
+ "epoch": 2.09,
1359
+ "learning_rate": 4.63578947368421e-06,
1360
+ "loss": 0.0572,
1361
+ "step": 5600
1362
+ },
1363
+ {
1364
+ "epoch": 2.1,
1365
+ "learning_rate": 4.609473684210526e-06,
1366
+ "loss": 0.0644,
1367
+ "step": 5625
1368
+ },
1369
+ {
1370
+ "epoch": 2.11,
1371
+ "learning_rate": 4.583157894736843e-06,
1372
+ "loss": 0.0573,
1373
+ "step": 5650
1374
+ },
1375
+ {
1376
+ "epoch": 2.12,
1377
+ "learning_rate": 4.556842105263159e-06,
1378
+ "loss": 0.0568,
1379
+ "step": 5675
1380
+ },
1381
+ {
1382
+ "epoch": 2.13,
1383
+ "learning_rate": 4.5305263157894744e-06,
1384
+ "loss": 0.0675,
1385
+ "step": 5700
1386
+ },
1387
+ {
1388
+ "epoch": 2.14,
1389
+ "learning_rate": 4.504210526315789e-06,
1390
+ "loss": 0.0729,
1391
+ "step": 5725
1392
+ },
1393
+ {
1394
+ "epoch": 2.15,
1395
+ "learning_rate": 4.477894736842105e-06,
1396
+ "loss": 0.0606,
1397
+ "step": 5750
1398
+ },
1399
+ {
1400
+ "epoch": 2.16,
1401
+ "learning_rate": 4.451578947368421e-06,
1402
+ "loss": 0.0535,
1403
+ "step": 5775
1404
+ },
1405
+ {
1406
+ "epoch": 2.17,
1407
+ "learning_rate": 4.425263157894737e-06,
1408
+ "loss": 0.061,
1409
+ "step": 5800
1410
+ },
1411
+ {
1412
+ "epoch": 2.18,
1413
+ "learning_rate": 4.3989473684210535e-06,
1414
+ "loss": 0.0597,
1415
+ "step": 5825
1416
+ },
1417
+ {
1418
+ "epoch": 2.19,
1419
+ "learning_rate": 4.3726315789473685e-06,
1420
+ "loss": 0.0621,
1421
+ "step": 5850
1422
+ },
1423
+ {
1424
+ "epoch": 2.2,
1425
+ "learning_rate": 4.346315789473684e-06,
1426
+ "loss": 0.0603,
1427
+ "step": 5875
1428
+ },
1429
+ {
1430
+ "epoch": 2.21,
1431
+ "learning_rate": 4.32e-06,
1432
+ "loss": 0.0523,
1433
+ "step": 5900
1434
+ },
1435
+ {
1436
+ "epoch": 2.22,
1437
+ "learning_rate": 4.293684210526316e-06,
1438
+ "loss": 0.0629,
1439
+ "step": 5925
1440
+ },
1441
+ {
1442
+ "epoch": 2.23,
1443
+ "learning_rate": 4.267368421052632e-06,
1444
+ "loss": 0.0578,
1445
+ "step": 5950
1446
+ },
1447
+ {
1448
+ "epoch": 2.23,
1449
+ "learning_rate": 4.241052631578948e-06,
1450
+ "loss": 0.0644,
1451
+ "step": 5975
1452
+ },
1453
+ {
1454
+ "epoch": 2.24,
1455
+ "learning_rate": 4.214736842105263e-06,
1456
+ "loss": 0.0685,
1457
+ "step": 6000
1458
+ },
1459
+ {
1460
+ "epoch": 2.25,
1461
+ "learning_rate": 4.188421052631579e-06,
1462
+ "loss": 0.0671,
1463
+ "step": 6025
1464
+ },
1465
+ {
1466
+ "epoch": 2.26,
1467
+ "learning_rate": 4.162105263157895e-06,
1468
+ "loss": 0.0534,
1469
+ "step": 6050
1470
+ },
1471
+ {
1472
+ "epoch": 2.27,
1473
+ "learning_rate": 4.135789473684211e-06,
1474
+ "loss": 0.0619,
1475
+ "step": 6075
1476
+ },
1477
+ {
1478
+ "epoch": 2.28,
1479
+ "learning_rate": 4.109473684210527e-06,
1480
+ "loss": 0.0612,
1481
+ "step": 6100
1482
+ },
1483
+ {
1484
+ "epoch": 2.29,
1485
+ "learning_rate": 4.0831578947368425e-06,
1486
+ "loss": 0.0648,
1487
+ "step": 6125
1488
+ },
1489
+ {
1490
+ "epoch": 2.3,
1491
+ "learning_rate": 4.056842105263158e-06,
1492
+ "loss": 0.0572,
1493
+ "step": 6150
1494
+ },
1495
+ {
1496
+ "epoch": 2.31,
1497
+ "learning_rate": 4.030526315789474e-06,
1498
+ "loss": 0.0556,
1499
+ "step": 6175
1500
+ },
1501
+ {
1502
+ "epoch": 2.32,
1503
+ "learning_rate": 4.00421052631579e-06,
1504
+ "loss": 0.0592,
1505
+ "step": 6200
1506
+ },
1507
+ {
1508
+ "epoch": 2.33,
1509
+ "learning_rate": 3.977894736842106e-06,
1510
+ "loss": 0.0592,
1511
+ "step": 6225
1512
+ },
1513
+ {
1514
+ "epoch": 2.34,
1515
+ "learning_rate": 3.951578947368422e-06,
1516
+ "loss": 0.0612,
1517
+ "step": 6250
1518
+ },
1519
+ {
1520
+ "epoch": 2.35,
1521
+ "learning_rate": 3.9252631578947366e-06,
1522
+ "loss": 0.055,
1523
+ "step": 6275
1524
+ },
1525
+ {
1526
+ "epoch": 2.36,
1527
+ "learning_rate": 3.898947368421052e-06,
1528
+ "loss": 0.059,
1529
+ "step": 6300
1530
+ },
1531
+ {
1532
+ "epoch": 2.37,
1533
+ "learning_rate": 3.872631578947369e-06,
1534
+ "loss": 0.0643,
1535
+ "step": 6325
1536
+ },
1537
+ {
1538
+ "epoch": 2.37,
1539
+ "learning_rate": 3.846315789473685e-06,
1540
+ "loss": 0.0639,
1541
+ "step": 6350
1542
+ },
1543
+ {
1544
+ "epoch": 2.38,
1545
+ "learning_rate": 3.820000000000001e-06,
1546
+ "loss": 0.0472,
1547
+ "step": 6375
1548
+ },
1549
+ {
1550
+ "epoch": 2.39,
1551
+ "learning_rate": 3.793684210526316e-06,
1552
+ "loss": 0.0661,
1553
+ "step": 6400
1554
+ },
1555
+ {
1556
+ "epoch": 2.4,
1557
+ "learning_rate": 3.767368421052632e-06,
1558
+ "loss": 0.0604,
1559
+ "step": 6425
1560
+ },
1561
+ {
1562
+ "epoch": 2.41,
1563
+ "learning_rate": 3.7410526315789473e-06,
1564
+ "loss": 0.0654,
1565
+ "step": 6450
1566
+ },
1567
+ {
1568
+ "epoch": 2.42,
1569
+ "learning_rate": 3.714736842105263e-06,
1570
+ "loss": 0.0639,
1571
+ "step": 6475
1572
+ },
1573
+ {
1574
+ "epoch": 2.43,
1575
+ "learning_rate": 3.6884210526315794e-06,
1576
+ "loss": 0.0586,
1577
+ "step": 6500
1578
+ },
1579
+ {
1580
+ "epoch": 2.44,
1581
+ "learning_rate": 3.662105263157895e-06,
1582
+ "loss": 0.0566,
1583
+ "step": 6525
1584
+ },
1585
+ {
1586
+ "epoch": 2.45,
1587
+ "learning_rate": 3.635789473684211e-06,
1588
+ "loss": 0.056,
1589
+ "step": 6550
1590
+ },
1591
+ {
1592
+ "epoch": 2.46,
1593
+ "learning_rate": 3.6094736842105264e-06,
1594
+ "loss": 0.0592,
1595
+ "step": 6575
1596
+ },
1597
+ {
1598
+ "epoch": 2.47,
1599
+ "learning_rate": 3.5831578947368422e-06,
1600
+ "loss": 0.0585,
1601
+ "step": 6600
1602
+ },
1603
+ {
1604
+ "epoch": 2.48,
1605
+ "learning_rate": 3.556842105263158e-06,
1606
+ "loss": 0.0549,
1607
+ "step": 6625
1608
+ },
1609
+ {
1610
+ "epoch": 2.49,
1611
+ "learning_rate": 3.5305263157894743e-06,
1612
+ "loss": 0.0658,
1613
+ "step": 6650
1614
+ },
1615
+ {
1616
+ "epoch": 2.5,
1617
+ "learning_rate": 3.50421052631579e-06,
1618
+ "loss": 0.062,
1619
+ "step": 6675
1620
+ },
1621
+ {
1622
+ "epoch": 2.51,
1623
+ "learning_rate": 3.4778947368421055e-06,
1624
+ "loss": 0.0572,
1625
+ "step": 6700
1626
+ },
1627
+ {
1628
+ "epoch": 2.51,
1629
+ "learning_rate": 3.4515789473684213e-06,
1630
+ "loss": 0.0592,
1631
+ "step": 6725
1632
+ },
1633
+ {
1634
+ "epoch": 2.52,
1635
+ "learning_rate": 3.425263157894737e-06,
1636
+ "loss": 0.0732,
1637
+ "step": 6750
1638
+ },
1639
+ {
1640
+ "epoch": 2.53,
1641
+ "learning_rate": 3.398947368421053e-06,
1642
+ "loss": 0.0605,
1643
+ "step": 6775
1644
+ },
1645
+ {
1646
+ "epoch": 2.54,
1647
+ "learning_rate": 3.3726315789473683e-06,
1648
+ "loss": 0.065,
1649
+ "step": 6800
1650
+ },
1651
+ {
1652
+ "epoch": 2.55,
1653
+ "learning_rate": 3.3463157894736846e-06,
1654
+ "loss": 0.0698,
1655
+ "step": 6825
1656
+ },
1657
+ {
1658
+ "epoch": 2.56,
1659
+ "learning_rate": 3.3200000000000004e-06,
1660
+ "loss": 0.0638,
1661
+ "step": 6850
1662
+ },
1663
+ {
1664
+ "epoch": 2.57,
1665
+ "learning_rate": 3.2936842105263162e-06,
1666
+ "loss": 0.0588,
1667
+ "step": 6875
1668
+ },
1669
+ {
1670
+ "epoch": 2.58,
1671
+ "learning_rate": 3.267368421052632e-06,
1672
+ "loss": 0.0659,
1673
+ "step": 6900
1674
+ },
1675
+ {
1676
+ "epoch": 2.59,
1677
+ "learning_rate": 3.2410526315789474e-06,
1678
+ "loss": 0.0635,
1679
+ "step": 6925
1680
+ },
1681
+ {
1682
+ "epoch": 2.6,
1683
+ "learning_rate": 3.2147368421052633e-06,
1684
+ "loss": 0.0614,
1685
+ "step": 6950
1686
+ },
1687
+ {
1688
+ "epoch": 2.61,
1689
+ "learning_rate": 3.188421052631579e-06,
1690
+ "loss": 0.062,
1691
+ "step": 6975
1692
+ },
1693
+ {
1694
+ "epoch": 2.62,
1695
+ "learning_rate": 3.1621052631578953e-06,
1696
+ "loss": 0.0657,
1697
+ "step": 7000
1698
+ },
1699
+ {
1700
+ "epoch": 2.63,
1701
+ "learning_rate": 3.135789473684211e-06,
1702
+ "loss": 0.0565,
1703
+ "step": 7025
1704
+ },
1705
+ {
1706
+ "epoch": 2.64,
1707
+ "learning_rate": 3.1094736842105265e-06,
1708
+ "loss": 0.0571,
1709
+ "step": 7050
1710
+ },
1711
+ {
1712
+ "epoch": 2.65,
1713
+ "learning_rate": 3.0831578947368423e-06,
1714
+ "loss": 0.0526,
1715
+ "step": 7075
1716
+ },
1717
+ {
1718
+ "epoch": 2.66,
1719
+ "learning_rate": 3.056842105263158e-06,
1720
+ "loss": 0.051,
1721
+ "step": 7100
1722
+ },
1723
+ {
1724
+ "epoch": 2.66,
1725
+ "learning_rate": 3.0305263157894736e-06,
1726
+ "loss": 0.0562,
1727
+ "step": 7125
1728
+ },
1729
+ {
1730
+ "epoch": 2.67,
1731
+ "learning_rate": 3.0042105263157894e-06,
1732
+ "loss": 0.0685,
1733
+ "step": 7150
1734
+ },
1735
+ {
1736
+ "epoch": 2.68,
1737
+ "learning_rate": 2.9778947368421056e-06,
1738
+ "loss": 0.0615,
1739
+ "step": 7175
1740
+ },
1741
+ {
1742
+ "epoch": 2.69,
1743
+ "learning_rate": 2.9515789473684214e-06,
1744
+ "loss": 0.0615,
1745
+ "step": 7200
1746
+ },
1747
+ {
1748
+ "epoch": 2.7,
1749
+ "learning_rate": 2.9252631578947373e-06,
1750
+ "loss": 0.0665,
1751
+ "step": 7225
1752
+ },
1753
+ {
1754
+ "epoch": 2.71,
1755
+ "learning_rate": 2.8989473684210526e-06,
1756
+ "loss": 0.06,
1757
+ "step": 7250
1758
+ },
1759
+ {
1760
+ "epoch": 2.72,
1761
+ "learning_rate": 2.8726315789473685e-06,
1762
+ "loss": 0.0534,
1763
+ "step": 7275
1764
+ },
1765
+ {
1766
+ "epoch": 2.73,
1767
+ "learning_rate": 2.8463157894736843e-06,
1768
+ "loss": 0.0576,
1769
+ "step": 7300
1770
+ },
1771
+ {
1772
+ "epoch": 2.74,
1773
+ "learning_rate": 2.82e-06,
1774
+ "loss": 0.0543,
1775
+ "step": 7325
1776
+ },
1777
+ {
1778
+ "epoch": 2.75,
1779
+ "learning_rate": 2.7936842105263163e-06,
1780
+ "loss": 0.064,
1781
+ "step": 7350
1782
+ },
1783
+ {
1784
+ "epoch": 2.76,
1785
+ "learning_rate": 2.7673684210526317e-06,
1786
+ "loss": 0.0601,
1787
+ "step": 7375
1788
+ },
1789
+ {
1790
+ "epoch": 2.77,
1791
+ "learning_rate": 2.7410526315789476e-06,
1792
+ "loss": 0.0516,
1793
+ "step": 7400
1794
+ },
1795
+ {
1796
+ "epoch": 2.78,
1797
+ "learning_rate": 2.7147368421052634e-06,
1798
+ "loss": 0.0587,
1799
+ "step": 7425
1800
+ },
1801
+ {
1802
+ "epoch": 2.79,
1803
+ "learning_rate": 2.688421052631579e-06,
1804
+ "loss": 0.0566,
1805
+ "step": 7450
1806
+ },
1807
+ {
1808
+ "epoch": 2.8,
1809
+ "learning_rate": 2.6621052631578946e-06,
1810
+ "loss": 0.0542,
1811
+ "step": 7475
1812
+ },
1813
+ {
1814
+ "epoch": 2.8,
1815
+ "learning_rate": 2.635789473684211e-06,
1816
+ "loss": 0.0576,
1817
+ "step": 7500
1818
+ },
1819
+ {
1820
+ "epoch": 2.81,
1821
+ "learning_rate": 2.6094736842105267e-06,
1822
+ "loss": 0.0526,
1823
+ "step": 7525
1824
+ },
1825
+ {
1826
+ "epoch": 2.82,
1827
+ "learning_rate": 2.5831578947368425e-06,
1828
+ "loss": 0.0574,
1829
+ "step": 7550
1830
+ },
1831
+ {
1832
+ "epoch": 2.83,
1833
+ "learning_rate": 2.5568421052631583e-06,
1834
+ "loss": 0.049,
1835
+ "step": 7575
1836
+ },
1837
+ {
1838
+ "epoch": 2.84,
1839
+ "learning_rate": 2.5305263157894737e-06,
1840
+ "loss": 0.0624,
1841
+ "step": 7600
1842
+ },
1843
+ {
1844
+ "epoch": 2.85,
1845
+ "learning_rate": 2.5042105263157895e-06,
1846
+ "loss": 0.0589,
1847
+ "step": 7625
1848
+ },
1849
+ {
1850
+ "epoch": 2.86,
1851
+ "learning_rate": 2.4778947368421053e-06,
1852
+ "loss": 0.062,
1853
+ "step": 7650
1854
+ },
1855
+ {
1856
+ "epoch": 2.87,
1857
+ "learning_rate": 2.451578947368421e-06,
1858
+ "loss": 0.0629,
1859
+ "step": 7675
1860
+ },
1861
+ {
1862
+ "epoch": 2.88,
1863
+ "learning_rate": 2.425263157894737e-06,
1864
+ "loss": 0.0626,
1865
+ "step": 7700
1866
+ },
1867
+ {
1868
+ "epoch": 2.89,
1869
+ "learning_rate": 2.3989473684210528e-06,
1870
+ "loss": 0.0569,
1871
+ "step": 7725
1872
+ },
1873
+ {
1874
+ "epoch": 2.9,
1875
+ "learning_rate": 2.3726315789473686e-06,
1876
+ "loss": 0.0636,
1877
+ "step": 7750
1878
+ },
1879
+ {
1880
+ "epoch": 2.91,
1881
+ "learning_rate": 2.3463157894736844e-06,
1882
+ "loss": 0.0519,
1883
+ "step": 7775
1884
+ },
1885
+ {
1886
+ "epoch": 2.92,
1887
+ "learning_rate": 2.3200000000000002e-06,
1888
+ "loss": 0.0525,
1889
+ "step": 7800
1890
+ },
1891
+ {
1892
+ "epoch": 2.93,
1893
+ "learning_rate": 2.293684210526316e-06,
1894
+ "loss": 0.0576,
1895
+ "step": 7825
1896
+ },
1897
+ {
1898
+ "epoch": 2.94,
1899
+ "learning_rate": 2.267368421052632e-06,
1900
+ "loss": 0.063,
1901
+ "step": 7850
1902
+ },
1903
+ {
1904
+ "epoch": 2.95,
1905
+ "learning_rate": 2.2410526315789473e-06,
1906
+ "loss": 0.0494,
1907
+ "step": 7875
1908
+ },
1909
+ {
1910
+ "epoch": 2.95,
1911
+ "learning_rate": 2.2147368421052635e-06,
1912
+ "loss": 0.0614,
1913
+ "step": 7900
1914
+ },
1915
+ {
1916
+ "epoch": 2.96,
1917
+ "learning_rate": 2.188421052631579e-06,
1918
+ "loss": 0.0579,
1919
+ "step": 7925
1920
+ },
1921
+ {
1922
+ "epoch": 2.97,
1923
+ "learning_rate": 2.1621052631578947e-06,
1924
+ "loss": 0.0525,
1925
+ "step": 7950
1926
+ },
1927
+ {
1928
+ "epoch": 2.98,
1929
+ "learning_rate": 2.135789473684211e-06,
1930
+ "loss": 0.0496,
1931
+ "step": 7975
1932
+ },
1933
+ {
1934
+ "epoch": 2.99,
1935
+ "learning_rate": 2.1094736842105264e-06,
1936
+ "loss": 0.052,
1937
+ "step": 8000
1938
+ },
1939
+ {
1940
+ "epoch": 3.0,
1941
+ "learning_rate": 2.083157894736842e-06,
1942
+ "loss": 0.0587,
1943
+ "step": 8025
1944
+ },
1945
+ {
1946
+ "epoch": 3.01,
1947
+ "learning_rate": 2.056842105263158e-06,
1948
+ "loss": 0.0285,
1949
+ "step": 8050
1950
+ },
1951
+ {
1952
+ "epoch": 3.02,
1953
+ "learning_rate": 2.030526315789474e-06,
1954
+ "loss": 0.0319,
1955
+ "step": 8075
1956
+ },
1957
+ {
1958
+ "epoch": 3.03,
1959
+ "learning_rate": 2.0042105263157896e-06,
1960
+ "loss": 0.0286,
1961
+ "step": 8100
1962
+ },
1963
+ {
1964
+ "epoch": 3.04,
1965
+ "learning_rate": 1.9778947368421055e-06,
1966
+ "loss": 0.0301,
1967
+ "step": 8125
1968
+ },
1969
+ {
1970
+ "epoch": 3.05,
1971
+ "learning_rate": 1.9515789473684213e-06,
1972
+ "loss": 0.0299,
1973
+ "step": 8150
1974
+ },
1975
+ {
1976
+ "epoch": 3.06,
1977
+ "learning_rate": 1.925263157894737e-06,
1978
+ "loss": 0.0294,
1979
+ "step": 8175
1980
+ },
1981
+ {
1982
+ "epoch": 3.07,
1983
+ "learning_rate": 1.8989473684210527e-06,
1984
+ "loss": 0.0307,
1985
+ "step": 8200
1986
+ },
1987
+ {
1988
+ "epoch": 3.08,
1989
+ "learning_rate": 1.8726315789473687e-06,
1990
+ "loss": 0.0337,
1991
+ "step": 8225
1992
+ },
1993
+ {
1994
+ "epoch": 3.09,
1995
+ "learning_rate": 1.8463157894736843e-06,
1996
+ "loss": 0.0298,
1997
+ "step": 8250
1998
+ },
1999
+ {
2000
+ "epoch": 3.09,
2001
+ "learning_rate": 1.8200000000000002e-06,
2002
+ "loss": 0.024,
2003
+ "step": 8275
2004
+ },
2005
+ {
2006
+ "epoch": 3.1,
2007
+ "learning_rate": 1.7936842105263158e-06,
2008
+ "loss": 0.028,
2009
+ "step": 8300
2010
+ },
2011
+ {
2012
+ "epoch": 3.11,
2013
+ "learning_rate": 1.7673684210526318e-06,
2014
+ "loss": 0.0269,
2015
+ "step": 8325
2016
+ },
2017
+ {
2018
+ "epoch": 3.12,
2019
+ "learning_rate": 1.7410526315789474e-06,
2020
+ "loss": 0.0283,
2021
+ "step": 8350
2022
+ },
2023
+ {
2024
+ "epoch": 3.13,
2025
+ "learning_rate": 1.7147368421052632e-06,
2026
+ "loss": 0.0283,
2027
+ "step": 8375
2028
+ },
2029
+ {
2030
+ "epoch": 3.14,
2031
+ "learning_rate": 1.6884210526315792e-06,
2032
+ "loss": 0.0293,
2033
+ "step": 8400
2034
+ },
2035
+ {
2036
+ "epoch": 3.15,
2037
+ "learning_rate": 1.6621052631578948e-06,
2038
+ "loss": 0.0343,
2039
+ "step": 8425
2040
+ },
2041
+ {
2042
+ "epoch": 3.16,
2043
+ "learning_rate": 1.6357894736842107e-06,
2044
+ "loss": 0.0305,
2045
+ "step": 8450
2046
+ },
2047
+ {
2048
+ "epoch": 3.17,
2049
+ "learning_rate": 1.6094736842105265e-06,
2050
+ "loss": 0.0248,
2051
+ "step": 8475
2052
+ },
2053
+ {
2054
+ "epoch": 3.18,
2055
+ "learning_rate": 1.5831578947368423e-06,
2056
+ "loss": 0.0307,
2057
+ "step": 8500
2058
+ },
2059
+ {
2060
+ "epoch": 3.19,
2061
+ "learning_rate": 1.556842105263158e-06,
2062
+ "loss": 0.0263,
2063
+ "step": 8525
2064
+ },
2065
+ {
2066
+ "epoch": 3.2,
2067
+ "learning_rate": 1.5305263157894737e-06,
2068
+ "loss": 0.0314,
2069
+ "step": 8550
2070
+ },
2071
+ {
2072
+ "epoch": 3.21,
2073
+ "learning_rate": 1.5042105263157898e-06,
2074
+ "loss": 0.0298,
2075
+ "step": 8575
2076
+ },
2077
+ {
2078
+ "epoch": 3.22,
2079
+ "learning_rate": 1.4778947368421054e-06,
2080
+ "loss": 0.0239,
2081
+ "step": 8600
2082
+ },
2083
+ {
2084
+ "epoch": 3.23,
2085
+ "learning_rate": 1.4515789473684212e-06,
2086
+ "loss": 0.0329,
2087
+ "step": 8625
2088
+ },
2089
+ {
2090
+ "epoch": 3.23,
2091
+ "learning_rate": 1.425263157894737e-06,
2092
+ "loss": 0.0296,
2093
+ "step": 8650
2094
+ },
2095
+ {
2096
+ "epoch": 3.24,
2097
+ "learning_rate": 1.3989473684210528e-06,
2098
+ "loss": 0.0298,
2099
+ "step": 8675
2100
+ },
2101
+ {
2102
+ "epoch": 3.25,
2103
+ "learning_rate": 1.3726315789473684e-06,
2104
+ "loss": 0.0247,
2105
+ "step": 8700
2106
+ },
2107
+ {
2108
+ "epoch": 3.26,
2109
+ "learning_rate": 1.3463157894736842e-06,
2110
+ "loss": 0.0284,
2111
+ "step": 8725
2112
+ },
2113
+ {
2114
+ "epoch": 3.27,
2115
+ "learning_rate": 1.32e-06,
2116
+ "loss": 0.0322,
2117
+ "step": 8750
2118
+ },
2119
+ {
2120
+ "epoch": 3.28,
2121
+ "learning_rate": 1.2936842105263159e-06,
2122
+ "loss": 0.0285,
2123
+ "step": 8775
2124
+ },
2125
+ {
2126
+ "epoch": 3.29,
2127
+ "learning_rate": 1.2673684210526315e-06,
2128
+ "loss": 0.0287,
2129
+ "step": 8800
2130
+ },
2131
+ {
2132
+ "epoch": 3.3,
2133
+ "learning_rate": 1.2410526315789475e-06,
2134
+ "loss": 0.0317,
2135
+ "step": 8825
2136
+ },
2137
+ {
2138
+ "epoch": 3.31,
2139
+ "learning_rate": 1.2147368421052633e-06,
2140
+ "loss": 0.0278,
2141
+ "step": 8850
2142
+ },
2143
+ {
2144
+ "epoch": 3.32,
2145
+ "learning_rate": 1.188421052631579e-06,
2146
+ "loss": 0.029,
2147
+ "step": 8875
2148
+ },
2149
+ {
2150
+ "epoch": 3.33,
2151
+ "learning_rate": 1.1621052631578948e-06,
2152
+ "loss": 0.0285,
2153
+ "step": 8900
2154
+ },
2155
+ {
2156
+ "epoch": 3.34,
2157
+ "learning_rate": 1.1357894736842106e-06,
2158
+ "loss": 0.0257,
2159
+ "step": 8925
2160
+ },
2161
+ {
2162
+ "epoch": 3.35,
2163
+ "learning_rate": 1.1094736842105264e-06,
2164
+ "loss": 0.026,
2165
+ "step": 8950
2166
+ },
2167
+ {
2168
+ "epoch": 3.36,
2169
+ "learning_rate": 1.0831578947368422e-06,
2170
+ "loss": 0.0303,
2171
+ "step": 8975
2172
+ },
2173
+ {
2174
+ "epoch": 3.37,
2175
+ "learning_rate": 1.0568421052631578e-06,
2176
+ "loss": 0.029,
2177
+ "step": 9000
2178
+ },
2179
+ {
2180
+ "epoch": 3.38,
2181
+ "learning_rate": 1.0305263157894739e-06,
2182
+ "loss": 0.0282,
2183
+ "step": 9025
2184
+ },
2185
+ {
2186
+ "epoch": 3.38,
2187
+ "learning_rate": 1.0042105263157897e-06,
2188
+ "loss": 0.029,
2189
+ "step": 9050
2190
+ },
2191
+ {
2192
+ "epoch": 3.39,
2193
+ "learning_rate": 9.778947368421053e-07,
2194
+ "loss": 0.0252,
2195
+ "step": 9075
2196
+ },
2197
+ {
2198
+ "epoch": 3.4,
2199
+ "learning_rate": 9.515789473684212e-07,
2200
+ "loss": 0.0273,
2201
+ "step": 9100
2202
+ },
2203
+ {
2204
+ "epoch": 3.41,
2205
+ "learning_rate": 9.252631578947368e-07,
2206
+ "loss": 0.0316,
2207
+ "step": 9125
2208
+ },
2209
+ {
2210
+ "epoch": 3.42,
2211
+ "learning_rate": 8.989473684210527e-07,
2212
+ "loss": 0.0276,
2213
+ "step": 9150
2214
+ },
2215
+ {
2216
+ "epoch": 3.43,
2217
+ "learning_rate": 8.726315789473686e-07,
2218
+ "loss": 0.0287,
2219
+ "step": 9175
2220
+ },
2221
+ {
2222
+ "epoch": 3.44,
2223
+ "learning_rate": 8.463157894736843e-07,
2224
+ "loss": 0.028,
2225
+ "step": 9200
2226
+ },
2227
+ {
2228
+ "epoch": 3.45,
2229
+ "learning_rate": 8.200000000000001e-07,
2230
+ "loss": 0.0247,
2231
+ "step": 9225
2232
+ },
2233
+ {
2234
+ "epoch": 3.46,
2235
+ "learning_rate": 7.936842105263158e-07,
2236
+ "loss": 0.0288,
2237
+ "step": 9250
2238
+ },
2239
+ {
2240
+ "epoch": 3.47,
2241
+ "learning_rate": 7.673684210526316e-07,
2242
+ "loss": 0.026,
2243
+ "step": 9275
2244
+ },
2245
+ {
2246
+ "epoch": 3.48,
2247
+ "learning_rate": 7.410526315789475e-07,
2248
+ "loss": 0.0304,
2249
+ "step": 9300
2250
+ },
2251
+ {
2252
+ "epoch": 3.49,
2253
+ "learning_rate": 7.147368421052632e-07,
2254
+ "loss": 0.0283,
2255
+ "step": 9325
2256
+ },
2257
+ {
2258
+ "epoch": 3.5,
2259
+ "learning_rate": 6.884210526315791e-07,
2260
+ "loss": 0.0264,
2261
+ "step": 9350
2262
+ },
2263
+ {
2264
+ "epoch": 3.51,
2265
+ "learning_rate": 6.621052631578948e-07,
2266
+ "loss": 0.033,
2267
+ "step": 9375
2268
+ },
2269
+ {
2270
+ "epoch": 3.52,
2271
+ "learning_rate": 6.357894736842106e-07,
2272
+ "loss": 0.0276,
2273
+ "step": 9400
2274
+ },
2275
+ {
2276
+ "epoch": 3.52,
2277
+ "learning_rate": 6.094736842105263e-07,
2278
+ "loss": 0.0285,
2279
+ "step": 9425
2280
+ },
2281
+ {
2282
+ "epoch": 3.53,
2283
+ "learning_rate": 5.831578947368421e-07,
2284
+ "loss": 0.0279,
2285
+ "step": 9450
2286
+ },
2287
+ {
2288
+ "epoch": 3.54,
2289
+ "learning_rate": 5.56842105263158e-07,
2290
+ "loss": 0.0273,
2291
+ "step": 9475
2292
+ },
2293
+ {
2294
+ "epoch": 3.55,
2295
+ "learning_rate": 5.305263157894737e-07,
2296
+ "loss": 0.0225,
2297
+ "step": 9500
2298
+ },
2299
+ {
2300
+ "epoch": 3.56,
2301
+ "learning_rate": 5.042105263157895e-07,
2302
+ "loss": 0.0253,
2303
+ "step": 9525
2304
+ },
2305
+ {
2306
+ "epoch": 3.57,
2307
+ "learning_rate": 4.778947368421053e-07,
2308
+ "loss": 0.0306,
2309
+ "step": 9550
2310
+ },
2311
+ {
2312
+ "epoch": 3.58,
2313
+ "learning_rate": 4.5157894736842107e-07,
2314
+ "loss": 0.0294,
2315
+ "step": 9575
2316
+ },
2317
+ {
2318
+ "epoch": 3.59,
2319
+ "learning_rate": 4.2526315789473684e-07,
2320
+ "loss": 0.0277,
2321
+ "step": 9600
2322
+ },
2323
+ {
2324
+ "epoch": 3.6,
2325
+ "learning_rate": 3.9894736842105266e-07,
2326
+ "loss": 0.031,
2327
+ "step": 9625
2328
+ },
2329
+ {
2330
+ "epoch": 3.61,
2331
+ "learning_rate": 3.726315789473685e-07,
2332
+ "loss": 0.031,
2333
+ "step": 9650
2334
+ },
2335
+ {
2336
+ "epoch": 3.62,
2337
+ "learning_rate": 3.4631578947368424e-07,
2338
+ "loss": 0.0271,
2339
+ "step": 9675
2340
+ },
2341
+ {
2342
+ "epoch": 3.63,
2343
+ "learning_rate": 3.2e-07,
2344
+ "loss": 0.024,
2345
+ "step": 9700
2346
+ },
2347
+ {
2348
+ "epoch": 3.64,
2349
+ "learning_rate": 2.936842105263158e-07,
2350
+ "loss": 0.0283,
2351
+ "step": 9725
2352
+ },
2353
+ {
2354
+ "epoch": 3.65,
2355
+ "learning_rate": 2.6736842105263164e-07,
2356
+ "loss": 0.0241,
2357
+ "step": 9750
2358
+ },
2359
+ {
2360
+ "epoch": 3.66,
2361
+ "learning_rate": 2.410526315789474e-07,
2362
+ "loss": 0.0273,
2363
+ "step": 9775
2364
+ },
2365
+ {
2366
+ "epoch": 3.66,
2367
+ "learning_rate": 2.1473684210526317e-07,
2368
+ "loss": 0.0315,
2369
+ "step": 9800
2370
+ },
2371
+ {
2372
+ "epoch": 3.67,
2373
+ "learning_rate": 1.8842105263157897e-07,
2374
+ "loss": 0.0286,
2375
+ "step": 9825
2376
+ },
2377
+ {
2378
+ "epoch": 3.68,
2379
+ "learning_rate": 1.6210526315789476e-07,
2380
+ "loss": 0.0274,
2381
+ "step": 9850
2382
+ },
2383
+ {
2384
+ "epoch": 3.69,
2385
+ "learning_rate": 1.3578947368421055e-07,
2386
+ "loss": 0.0307,
2387
+ "step": 9875
2388
+ },
2389
+ {
2390
+ "epoch": 3.7,
2391
+ "learning_rate": 1.0947368421052632e-07,
2392
+ "loss": 0.0296,
2393
+ "step": 9900
2394
+ },
2395
+ {
2396
+ "epoch": 3.71,
2397
+ "learning_rate": 8.315789473684211e-08,
2398
+ "loss": 0.0277,
2399
+ "step": 9925
2400
+ },
2401
+ {
2402
+ "epoch": 3.72,
2403
+ "learning_rate": 5.68421052631579e-08,
2404
+ "loss": 0.0261,
2405
+ "step": 9950
2406
+ },
2407
+ {
2408
+ "epoch": 3.73,
2409
+ "learning_rate": 3.0526315789473686e-08,
2410
+ "loss": 0.025,
2411
+ "step": 9975
2412
+ },
2413
+ {
2414
+ "epoch": 3.74,
2415
+ "learning_rate": 4.210526315789474e-09,
2416
+ "loss": 0.0237,
2417
+ "step": 10000
2418
+ },
2419
+ {
2420
+ "epoch": 3.74,
2421
+ "eval_loss": 0.1606837958097458,
2422
+ "eval_runtime": 4871.4962,
2423
+ "eval_samples_per_second": 2.244,
2424
+ "eval_steps_per_second": 0.281,
2425
+ "eval_wer": 455.00475996192034,
2426
+ "step": 10000
2427
  }
2428
  ],
2429
+ "max_steps": 10000,
2430
  "num_train_epochs": 4,
2431
+ "total_flos": 4.61693352701952e+19,
2432
  "trial_name": null,
2433
  "trial_params": null
2434
  }