tndklab commited on
Commit
27aea00
1 Parent(s): 6418845

Model save

Browse files
Files changed (2) hide show
  1. README.md +38 -38
  2. trainer_state.json +425 -425
README.md CHANGED
@@ -17,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [jonatasgrosman/wav2vec2-large-xlsr-53-japanese](https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-japanese) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0042
21
- - Wer: 0.1814
22
- - Cer: 0.1410
23
 
24
  ## Model description
25
 
@@ -51,41 +51,41 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
53
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|
54
- | 14.8616 | 1.0 | 60 | 13.4101 | 0.9940 | 1.1374 |
55
- | 4.7182 | 2.0 | 120 | 3.6739 | 0.9906 | 0.9961 |
56
- | 2.6113 | 3.0 | 180 | 2.2707 | 1.0 | 0.8933 |
57
- | 1.6113 | 4.0 | 240 | 1.3777 | 0.9996 | 0.7362 |
58
- | 1.0685 | 5.0 | 300 | 0.8246 | 0.8156 | 0.5875 |
59
- | 0.8227 | 6.0 | 360 | 0.5762 | 0.7032 | 0.4400 |
60
- | 0.6503 | 7.0 | 420 | 0.4838 | 0.6341 | 0.3888 |
61
- | 0.5194 | 8.0 | 480 | 0.3765 | 0.5834 | 0.3683 |
62
- | 0.4697 | 9.0 | 540 | 0.2801 | 0.4421 | 0.2695 |
63
- | 0.3755 | 10.0 | 600 | 0.2182 | 0.3629 | 0.2027 |
64
- | 0.2933 | 11.0 | 660 | 0.1622 | 0.3182 | 0.2007 |
65
- | 0.2956 | 12.0 | 720 | 0.1057 | 0.2637 | 0.1789 |
66
- | 0.3003 | 13.0 | 780 | 0.0687 | 0.2393 | 0.1626 |
67
- | 0.1756 | 14.0 | 840 | 0.0452 | 0.2107 | 0.1464 |
68
- | 0.1416 | 15.0 | 900 | 0.0311 | 0.2074 | 0.1504 |
69
- | 0.1371 | 16.0 | 960 | 0.0250 | 0.2029 | 0.1348 |
70
- | 0.1739 | 17.0 | 1020 | 0.0231 | 0.2002 | 0.1484 |
71
- | 0.1035 | 18.0 | 1080 | 0.0176 | 0.1965 | 0.1441 |
72
- | 0.104 | 19.0 | 1140 | 0.0138 | 0.1897 | 0.1647 |
73
- | 0.1195 | 20.0 | 1200 | 0.0145 | 0.1916 | 0.1325 |
74
- | 0.0982 | 21.0 | 1260 | 0.0107 | 0.1893 | 0.1321 |
75
- | 0.0755 | 22.0 | 1320 | 0.0097 | 0.1871 | 0.1293 |
76
- | 0.0702 | 23.0 | 1380 | 0.0084 | 0.1860 | 0.1407 |
77
- | 0.044 | 24.0 | 1440 | 0.0066 | 0.1826 | 0.1417 |
78
- | 0.0735 | 25.0 | 1500 | 0.0074 | 0.1848 | 0.1418 |
79
- | 0.0736 | 26.0 | 1560 | 0.0069 | 0.1860 | 0.1418 |
80
- | 0.0714 | 27.0 | 1620 | 0.0055 | 0.1837 | 0.1394 |
81
- | 0.062 | 28.0 | 1680 | 0.0047 | 0.1833 | 0.1441 |
82
- | 0.0497 | 29.0 | 1740 | 0.0048 | 0.1829 | 0.1441 |
83
- | 0.0482 | 30.0 | 1800 | 0.0045 | 0.1814 | 0.1476 |
84
- | 0.072 | 31.0 | 1860 | 0.0041 | 0.1818 | 0.1487 |
85
- | 0.0443 | 32.0 | 1920 | 0.0044 | 0.1811 | 0.1457 |
86
- | 0.0494 | 33.0 | 1980 | 0.0046 | 0.1818 | 0.1430 |
87
- | 0.053 | 34.0 | 2040 | 0.0043 | 0.1814 | 0.1418 |
88
- | 0.0465 | 35.0 | 2100 | 0.0042 | 0.1814 | 0.1410 |
89
 
90
 
91
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [jonatasgrosman/wav2vec2-large-xlsr-53-japanese](https://huggingface.co/jonatasgrosman/wav2vec2-large-xlsr-53-japanese) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0032
21
+ - Wer: 0.1835
22
+ - Cer: 0.1557
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Wer | Cer |
53
  |:-------------:|:-----:|:----:|:---------------:|:------:|:------:|
54
+ | 14.5102 | 1.0 | 60 | 13.5418 | 1.0011 | 1.2966 |
55
+ | 6.2989 | 2.0 | 120 | 4.6126 | 1.0 | 0.8736 |
56
+ | 2.208 | 3.0 | 180 | 1.7112 | 0.9993 | 0.8160 |
57
+ | 1.098 | 4.0 | 240 | 0.8765 | 0.8410 | 0.5698 |
58
+ | 0.8201 | 5.0 | 300 | 0.6964 | 0.8221 | 0.5340 |
59
+ | 0.7499 | 6.0 | 360 | 0.6299 | 0.8217 | 0.5305 |
60
+ | 0.6753 | 7.0 | 420 | 0.5998 | 0.7691 | 0.4482 |
61
+ | 0.6003 | 8.0 | 480 | 0.5502 | 0.7394 | 0.4564 |
62
+ | 0.5732 | 9.0 | 540 | 0.5047 | 0.7098 | 0.3906 |
63
+ | 0.5404 | 10.0 | 600 | 0.4694 | 0.6679 | 0.3283 |
64
+ | 0.4889 | 11.0 | 660 | 0.3979 | 0.6379 | 0.3017 |
65
+ | 0.4401 | 12.0 | 720 | 0.3255 | 0.5849 | 0.2792 |
66
+ | 0.4295 | 13.0 | 780 | 0.2853 | 0.5044 | 0.2772 |
67
+ | 0.3216 | 14.0 | 840 | 0.2204 | 0.4511 | 0.2234 |
68
+ | 0.2583 | 15.0 | 900 | 0.1492 | 0.3929 | 0.2130 |
69
+ | 0.226 | 16.0 | 960 | 0.1007 | 0.2817 | 0.1690 |
70
+ | 0.2304 | 17.0 | 1020 | 0.0694 | 0.2439 | 0.1699 |
71
+ | 0.1487 | 18.0 | 1080 | 0.0471 | 0.2142 | 0.1758 |
72
+ | 0.1045 | 19.0 | 1140 | 0.0305 | 0.2168 | 0.1686 |
73
+ | 0.1104 | 20.0 | 1200 | 0.0256 | 0.2072 | 0.1625 |
74
+ | 0.094 | 21.0 | 1260 | 0.0226 | 0.2272 | 0.1760 |
75
+ | 0.0987 | 22.0 | 1320 | 0.0129 | 0.2013 | 0.1900 |
76
+ | 0.0753 | 23.0 | 1380 | 0.0110 | 0.2053 | 0.1786 |
77
+ | 0.0544 | 24.0 | 1440 | 0.0091 | 0.1909 | 0.1858 |
78
+ | 0.0684 | 25.0 | 1500 | 0.0083 | 0.1901 | 0.1728 |
79
+ | 0.0723 | 26.0 | 1560 | 0.0083 | 0.2027 | 0.1854 |
80
+ | 0.061 | 27.0 | 1620 | 0.0061 | 0.2020 | 0.1779 |
81
+ | 0.0635 | 28.0 | 1680 | 0.0059 | 0.1964 | 0.1818 |
82
+ | 0.0336 | 29.0 | 1740 | 0.0048 | 0.1887 | 0.1574 |
83
+ | 0.0455 | 30.0 | 1800 | 0.0036 | 0.1842 | 0.1694 |
84
+ | 0.0672 | 31.0 | 1860 | 0.0038 | 0.1838 | 0.1507 |
85
+ | 0.0315 | 32.0 | 1920 | 0.0033 | 0.1853 | 0.1555 |
86
+ | 0.0466 | 33.0 | 1980 | 0.0033 | 0.1827 | 0.1569 |
87
+ | 0.0491 | 34.0 | 2040 | 0.0035 | 0.1835 | 0.1556 |
88
+ | 0.0315 | 35.0 | 2100 | 0.0032 | 0.1835 | 0.1557 |
89
 
90
 
91
  ### Framework versions
trainer_state.json CHANGED
@@ -11,1620 +11,1620 @@
11
  {
12
  "epoch": 0.17,
13
  "learning_rate": 5.000000000000001e-07,
14
- "loss": 17.1856,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.33,
19
  "learning_rate": 1.0000000000000002e-06,
20
- "loss": 17.1183,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.5,
25
  "learning_rate": 1.5e-06,
26
- "loss": 16.9174,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.67,
31
  "learning_rate": 2.0000000000000003e-06,
32
- "loss": 16.5532,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.83,
37
  "learning_rate": 2.5e-06,
38
- "loss": 16.119,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.0,
43
  "learning_rate": 3e-06,
44
- "loss": 14.8616,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 1.0,
49
- "eval_cer": 1.1373958689946366,
50
- "eval_loss": 13.410123825073242,
51
- "eval_runtime": 6.9171,
52
- "eval_samples_per_second": 69.393,
53
- "eval_steps_per_second": 2.169,
54
- "eval_wer": 0.9939894815927873,
55
  "step": 60
56
  },
57
  {
58
  "epoch": 1.17,
59
  "learning_rate": 3.5000000000000004e-06,
60
- "loss": 13.5255,
61
  "step": 70
62
  },
63
  {
64
  "epoch": 1.33,
65
  "learning_rate": 4.000000000000001e-06,
66
- "loss": 11.433,
67
  "step": 80
68
  },
69
  {
70
  "epoch": 1.5,
71
  "learning_rate": 4.5e-06,
72
- "loss": 9.9334,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 1.67,
77
  "learning_rate": 5e-06,
78
- "loss": 7.2352,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 1.83,
83
  "learning_rate": 5.500000000000001e-06,
84
- "loss": 5.6212,
85
  "step": 110
86
  },
87
  {
88
  "epoch": 2.0,
89
  "learning_rate": 6e-06,
90
- "loss": 4.7182,
91
  "step": 120
92
  },
93
  {
94
  "epoch": 2.0,
95
- "eval_cer": 0.9961200502111149,
96
- "eval_loss": 3.6738953590393066,
97
- "eval_runtime": 7.3084,
98
- "eval_samples_per_second": 65.678,
99
- "eval_steps_per_second": 2.052,
100
- "eval_wer": 0.9906085649887303,
101
  "step": 120
102
  },
103
  {
104
  "epoch": 2.17,
105
  "learning_rate": 6.5000000000000004e-06,
106
- "loss": 3.6015,
107
  "step": 130
108
  },
109
  {
110
  "epoch": 2.33,
111
  "learning_rate": 7.000000000000001e-06,
112
- "loss": 3.5234,
113
  "step": 140
114
  },
115
  {
116
  "epoch": 2.5,
117
  "learning_rate": 7.5e-06,
118
- "loss": 3.098,
119
  "step": 150
120
  },
121
  {
122
  "epoch": 2.67,
123
  "learning_rate": 8.000000000000001e-06,
124
- "loss": 2.9318,
125
  "step": 160
126
  },
127
  {
128
  "epoch": 2.83,
129
  "learning_rate": 8.500000000000002e-06,
130
- "loss": 2.7208,
131
  "step": 170
132
  },
133
  {
134
  "epoch": 3.0,
135
  "learning_rate": 9e-06,
136
- "loss": 2.6113,
137
  "step": 180
138
  },
139
  {
140
  "epoch": 3.0,
141
- "eval_cer": 0.8933013808056601,
142
- "eval_loss": 2.27065110206604,
143
- "eval_runtime": 6.9792,
144
- "eval_samples_per_second": 68.775,
145
- "eval_steps_per_second": 2.149,
146
- "eval_wer": 1.0,
147
  "step": 180
148
  },
149
  {
150
  "epoch": 3.17,
151
  "learning_rate": 9.5e-06,
152
- "loss": 2.3861,
153
  "step": 190
154
  },
155
  {
156
  "epoch": 3.33,
157
  "learning_rate": 1e-05,
158
- "loss": 2.1747,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 3.5,
163
  "learning_rate": 1.05e-05,
164
- "loss": 2.1219,
165
  "step": 210
166
  },
167
  {
168
  "epoch": 3.67,
169
  "learning_rate": 1.1000000000000001e-05,
170
- "loss": 1.8921,
171
  "step": 220
172
  },
173
  {
174
  "epoch": 3.83,
175
  "learning_rate": 1.1500000000000002e-05,
176
- "loss": 1.7117,
177
  "step": 230
178
  },
179
  {
180
  "epoch": 4.0,
181
  "learning_rate": 1.2e-05,
182
- "loss": 1.6113,
183
  "step": 240
184
  },
185
  {
186
  "epoch": 4.0,
187
- "eval_cer": 0.7361634143558142,
188
- "eval_loss": 1.3776856660842896,
189
- "eval_runtime": 6.9999,
190
- "eval_samples_per_second": 68.573,
191
- "eval_steps_per_second": 2.143,
192
- "eval_wer": 0.9996243425995492,
193
  "step": 240
194
  },
195
  {
196
  "epoch": 4.17,
197
  "learning_rate": 1.25e-05,
198
- "loss": 1.6571,
199
  "step": 250
200
  },
201
  {
202
  "epoch": 4.33,
203
  "learning_rate": 1.3000000000000001e-05,
204
- "loss": 1.4174,
205
  "step": 260
206
  },
207
  {
208
  "epoch": 4.5,
209
  "learning_rate": 1.3500000000000001e-05,
210
- "loss": 1.3487,
211
  "step": 270
212
  },
213
  {
214
  "epoch": 4.67,
215
  "learning_rate": 1.4000000000000001e-05,
216
- "loss": 1.2325,
217
  "step": 280
218
  },
219
  {
220
  "epoch": 4.83,
221
  "learning_rate": 1.45e-05,
222
- "loss": 1.1461,
223
  "step": 290
224
  },
225
  {
226
  "epoch": 5.0,
227
  "learning_rate": 1.5e-05,
228
- "loss": 1.0685,
229
  "step": 300
230
  },
231
  {
232
  "epoch": 5.0,
233
- "eval_cer": 0.5874700445053064,
234
- "eval_loss": 0.8246225714683533,
235
- "eval_runtime": 6.9834,
236
- "eval_samples_per_second": 68.734,
237
- "eval_steps_per_second": 2.148,
238
- "eval_wer": 0.8155522163786627,
239
  "step": 300
240
  },
241
  {
242
  "epoch": 5.17,
243
  "learning_rate": 1.55e-05,
244
- "loss": 1.0175,
245
  "step": 310
246
  },
247
  {
248
  "epoch": 5.33,
249
  "learning_rate": 1.6000000000000003e-05,
250
- "loss": 0.9392,
251
  "step": 320
252
  },
253
  {
254
  "epoch": 5.5,
255
  "learning_rate": 1.65e-05,
256
- "loss": 0.8995,
257
  "step": 330
258
  },
259
  {
260
  "epoch": 5.67,
261
  "learning_rate": 1.7000000000000003e-05,
262
- "loss": 0.8644,
263
  "step": 340
264
  },
265
  {
266
  "epoch": 5.83,
267
  "learning_rate": 1.75e-05,
268
- "loss": 0.817,
269
  "step": 350
270
  },
271
  {
272
  "epoch": 6.0,
273
  "learning_rate": 1.8e-05,
274
- "loss": 0.8227,
275
  "step": 360
276
  },
277
  {
278
  "epoch": 6.0,
279
- "eval_cer": 0.4400319525276732,
280
- "eval_loss": 0.576172411441803,
281
- "eval_runtime": 6.9824,
282
- "eval_samples_per_second": 68.745,
283
- "eval_steps_per_second": 2.148,
284
- "eval_wer": 0.7032306536438768,
285
  "step": 360
286
  },
287
  {
288
  "epoch": 6.17,
289
  "learning_rate": 1.85e-05,
290
- "loss": 0.8101,
291
  "step": 370
292
  },
293
  {
294
  "epoch": 6.33,
295
  "learning_rate": 1.9e-05,
296
- "loss": 0.7121,
297
  "step": 380
298
  },
299
  {
300
  "epoch": 6.5,
301
  "learning_rate": 1.9500000000000003e-05,
302
- "loss": 0.7463,
303
  "step": 390
304
  },
305
  {
306
  "epoch": 6.67,
307
  "learning_rate": 2e-05,
308
- "loss": 0.6886,
309
  "step": 400
310
  },
311
  {
312
  "epoch": 6.83,
313
  "learning_rate": 2.05e-05,
314
- "loss": 0.6696,
315
  "step": 410
316
  },
317
  {
318
  "epoch": 7.0,
319
  "learning_rate": 2.1e-05,
320
- "loss": 0.6503,
321
  "step": 420
322
  },
323
  {
324
  "epoch": 7.0,
325
- "eval_cer": 0.38879379208033776,
326
- "eval_loss": 0.48376166820526123,
327
- "eval_runtime": 6.9916,
328
- "eval_samples_per_second": 68.653,
329
- "eval_steps_per_second": 2.145,
330
- "eval_wer": 0.6341096919609316,
331
  "step": 420
332
  },
333
  {
334
  "epoch": 7.17,
335
  "learning_rate": 2.15e-05,
336
- "loss": 0.6273,
337
  "step": 430
338
  },
339
  {
340
  "epoch": 7.33,
341
  "learning_rate": 2.2000000000000003e-05,
342
- "loss": 0.5682,
343
  "step": 440
344
  },
345
  {
346
  "epoch": 7.5,
347
  "learning_rate": 2.25e-05,
348
- "loss": 0.6073,
349
  "step": 450
350
  },
351
  {
352
  "epoch": 7.67,
353
  "learning_rate": 2.3000000000000003e-05,
354
- "loss": 0.589,
355
  "step": 460
356
  },
357
  {
358
  "epoch": 7.83,
359
  "learning_rate": 2.35e-05,
360
- "loss": 0.5588,
361
  "step": 470
362
  },
363
  {
364
  "epoch": 8.0,
365
  "learning_rate": 2.4e-05,
366
- "loss": 0.5194,
367
  "step": 480
368
  },
369
  {
370
  "epoch": 8.0,
371
- "eval_cer": 0.3682528814332991,
372
- "eval_loss": 0.3765193819999695,
373
- "eval_runtime": 6.9801,
374
- "eval_samples_per_second": 68.767,
375
- "eval_steps_per_second": 2.149,
376
- "eval_wer": 0.5833959429000751,
377
  "step": 480
378
  },
379
  {
380
  "epoch": 8.17,
381
  "learning_rate": 2.45e-05,
382
- "loss": 0.5705,
383
  "step": 490
384
  },
385
  {
386
  "epoch": 8.33,
387
  "learning_rate": 2.5e-05,
388
- "loss": 0.4612,
389
  "step": 500
390
  },
391
  {
392
  "epoch": 8.5,
393
  "learning_rate": 2.5500000000000003e-05,
394
- "loss": 0.4867,
395
  "step": 510
396
  },
397
  {
398
  "epoch": 8.67,
399
  "learning_rate": 2.6000000000000002e-05,
400
- "loss": 0.4553,
401
  "step": 520
402
  },
403
  {
404
  "epoch": 8.83,
405
  "learning_rate": 2.6500000000000004e-05,
406
- "loss": 0.473,
407
  "step": 530
408
  },
409
  {
410
  "epoch": 9.0,
411
  "learning_rate": 2.7000000000000002e-05,
412
- "loss": 0.4697,
413
  "step": 540
414
  },
415
  {
416
  "epoch": 9.0,
417
- "eval_cer": 0.26954239415725206,
418
- "eval_loss": 0.2800883948802948,
419
- "eval_runtime": 7.0168,
420
- "eval_samples_per_second": 68.407,
421
- "eval_steps_per_second": 2.138,
422
- "eval_wer": 0.44214876033057854,
423
  "step": 540
424
  },
425
  {
426
  "epoch": 9.17,
427
  "learning_rate": 2.7500000000000004e-05,
428
- "loss": 0.3982,
429
  "step": 550
430
  },
431
  {
432
  "epoch": 9.33,
433
  "learning_rate": 2.8000000000000003e-05,
434
- "loss": 0.4396,
435
  "step": 560
436
  },
437
  {
438
  "epoch": 9.5,
439
  "learning_rate": 2.8499999999999998e-05,
440
- "loss": 0.4404,
441
  "step": 570
442
  },
443
  {
444
  "epoch": 9.67,
445
  "learning_rate": 2.9e-05,
446
- "loss": 0.3886,
447
  "step": 580
448
  },
449
  {
450
  "epoch": 9.83,
451
  "learning_rate": 2.95e-05,
452
- "loss": 0.3819,
453
  "step": 590
454
  },
455
  {
456
  "epoch": 10.0,
457
  "learning_rate": 3e-05,
458
- "loss": 0.3755,
459
  "step": 600
460
  },
461
  {
462
  "epoch": 10.0,
463
- "eval_cer": 0.20267031838411503,
464
- "eval_loss": 0.21824190020561218,
465
- "eval_runtime": 7.0196,
466
- "eval_samples_per_second": 68.38,
467
- "eval_steps_per_second": 2.137,
468
- "eval_wer": 0.3628850488354621,
469
  "step": 600
470
  },
471
  {
472
  "epoch": 10.17,
473
  "learning_rate": 3.05e-05,
474
- "loss": 0.3753,
475
  "step": 610
476
  },
477
  {
478
  "epoch": 10.33,
479
  "learning_rate": 3.1e-05,
480
- "loss": 0.3833,
481
  "step": 620
482
  },
483
  {
484
  "epoch": 10.5,
485
  "learning_rate": 3.15e-05,
486
- "loss": 0.3578,
487
  "step": 630
488
  },
489
  {
490
  "epoch": 10.67,
491
  "learning_rate": 3.2000000000000005e-05,
492
- "loss": 0.3521,
493
  "step": 640
494
  },
495
  {
496
  "epoch": 10.83,
497
  "learning_rate": 3.2500000000000004e-05,
498
- "loss": 0.3605,
499
  "step": 650
500
  },
501
  {
502
  "epoch": 11.0,
503
  "learning_rate": 3.3e-05,
504
- "loss": 0.2933,
505
  "step": 660
506
  },
507
  {
508
  "epoch": 11.0,
509
- "eval_cer": 0.20073034348967247,
510
- "eval_loss": 0.16216430068016052,
511
- "eval_runtime": 7.0137,
512
- "eval_samples_per_second": 68.437,
513
- "eval_steps_per_second": 2.139,
514
- "eval_wer": 0.3181818181818182,
515
  "step": 660
516
  },
517
  {
518
  "epoch": 11.17,
519
  "learning_rate": 3.35e-05,
520
- "loss": 0.2853,
521
  "step": 670
522
  },
523
  {
524
  "epoch": 11.33,
525
  "learning_rate": 3.4000000000000007e-05,
526
- "loss": 0.2673,
527
  "step": 680
528
  },
529
  {
530
  "epoch": 11.5,
531
  "learning_rate": 3.45e-05,
532
- "loss": 0.2845,
533
  "step": 690
534
  },
535
  {
536
  "epoch": 11.67,
537
  "learning_rate": 3.5e-05,
538
- "loss": 0.3272,
539
  "step": 700
540
  },
541
  {
542
  "epoch": 11.83,
543
  "learning_rate": 3.55e-05,
544
- "loss": 0.2983,
545
  "step": 710
546
  },
547
  {
548
  "epoch": 12.0,
549
  "learning_rate": 3.6e-05,
550
- "loss": 0.2956,
551
  "step": 720
552
  },
553
  {
554
  "epoch": 12.0,
555
- "eval_cer": 0.1789341549697592,
556
- "eval_loss": 0.10565352439880371,
557
- "eval_runtime": 7.0172,
558
- "eval_samples_per_second": 68.404,
559
- "eval_steps_per_second": 2.138,
560
- "eval_wer": 0.2637114951164538,
561
  "step": 720
562
  },
563
  {
564
  "epoch": 12.17,
565
  "learning_rate": 3.65e-05,
566
- "loss": 0.3274,
567
  "step": 730
568
  },
569
  {
570
  "epoch": 12.33,
571
  "learning_rate": 3.7e-05,
572
- "loss": 0.2384,
573
  "step": 740
574
  },
575
  {
576
  "epoch": 12.5,
577
  "learning_rate": 3.7500000000000003e-05,
578
- "loss": 0.2369,
579
  "step": 750
580
  },
581
  {
582
  "epoch": 12.67,
583
  "learning_rate": 3.8e-05,
584
- "loss": 0.2398,
585
  "step": 760
586
  },
587
  {
588
  "epoch": 12.83,
589
  "learning_rate": 3.85e-05,
590
- "loss": 0.235,
591
  "step": 770
592
  },
593
  {
594
  "epoch": 13.0,
595
  "learning_rate": 3.9000000000000006e-05,
596
- "loss": 0.3003,
597
  "step": 780
598
  },
599
  {
600
  "epoch": 13.0,
601
- "eval_cer": 0.1626155426223896,
602
- "eval_loss": 0.06872875243425369,
603
- "eval_runtime": 6.9787,
604
- "eval_samples_per_second": 68.78,
605
- "eval_steps_per_second": 2.149,
606
- "eval_wer": 0.23929376408715253,
607
  "step": 780
608
  },
609
  {
610
  "epoch": 13.17,
611
  "learning_rate": 3.9500000000000005e-05,
612
- "loss": 0.2271,
613
  "step": 790
614
  },
615
  {
616
  "epoch": 13.33,
617
  "learning_rate": 4e-05,
618
- "loss": 0.197,
619
  "step": 800
620
  },
621
  {
622
  "epoch": 13.5,
623
  "learning_rate": 4.05e-05,
624
- "loss": 0.1845,
625
  "step": 810
626
  },
627
  {
628
  "epoch": 13.67,
629
  "learning_rate": 4.1e-05,
630
- "loss": 0.1497,
631
  "step": 820
632
  },
633
  {
634
  "epoch": 13.83,
635
  "learning_rate": 4.15e-05,
636
- "loss": 0.1805,
637
  "step": 830
638
  },
639
  {
640
  "epoch": 14.0,
641
  "learning_rate": 4.2e-05,
642
- "loss": 0.1756,
643
  "step": 840
644
  },
645
  {
646
  "epoch": 14.0,
647
- "eval_cer": 0.1464110464452813,
648
- "eval_loss": 0.0452243946492672,
649
- "eval_runtime": 6.9904,
650
- "eval_samples_per_second": 68.666,
651
- "eval_steps_per_second": 2.146,
652
- "eval_wer": 0.21074380165289255,
653
  "step": 840
654
  },
655
  {
656
  "epoch": 14.17,
657
  "learning_rate": 4.25e-05,
658
- "loss": 0.1576,
659
  "step": 850
660
  },
661
  {
662
  "epoch": 14.33,
663
  "learning_rate": 4.3e-05,
664
- "loss": 0.1812,
665
  "step": 860
666
  },
667
  {
668
  "epoch": 14.5,
669
  "learning_rate": 4.35e-05,
670
- "loss": 0.1622,
671
  "step": 870
672
  },
673
  {
674
  "epoch": 14.67,
675
  "learning_rate": 4.4000000000000006e-05,
676
- "loss": 0.1823,
677
  "step": 880
678
  },
679
  {
680
  "epoch": 14.83,
681
  "learning_rate": 4.4500000000000004e-05,
682
- "loss": 0.191,
683
  "step": 890
684
  },
685
  {
686
  "epoch": 15.0,
687
  "learning_rate": 4.5e-05,
688
- "loss": 0.1416,
689
  "step": 900
690
  },
691
  {
692
  "epoch": 15.0,
693
- "eval_cer": 0.15040511240442772,
694
- "eval_loss": 0.031135747209191322,
695
- "eval_runtime": 6.9856,
696
- "eval_samples_per_second": 68.713,
697
- "eval_steps_per_second": 2.147,
698
- "eval_wer": 0.20736288504883546,
699
  "step": 900
700
  },
701
  {
702
  "epoch": 15.17,
703
  "learning_rate": 4.55e-05,
704
- "loss": 0.1466,
705
  "step": 910
706
  },
707
  {
708
  "epoch": 15.33,
709
  "learning_rate": 4.600000000000001e-05,
710
- "loss": 0.145,
711
  "step": 920
712
  },
713
  {
714
  "epoch": 15.5,
715
  "learning_rate": 4.6500000000000005e-05,
716
- "loss": 0.1399,
717
  "step": 930
718
  },
719
  {
720
  "epoch": 15.67,
721
  "learning_rate": 4.7e-05,
722
- "loss": 0.1634,
723
  "step": 940
724
  },
725
  {
726
  "epoch": 15.83,
727
  "learning_rate": 4.75e-05,
728
- "loss": 0.1393,
729
  "step": 950
730
  },
731
  {
732
  "epoch": 16.0,
733
  "learning_rate": 4.8e-05,
734
- "loss": 0.1371,
735
  "step": 960
736
  },
737
  {
738
  "epoch": 16.0,
739
- "eval_cer": 0.13477119707862603,
740
- "eval_loss": 0.025044025853276253,
741
- "eval_runtime": 7.0465,
742
- "eval_samples_per_second": 68.119,
743
- "eval_steps_per_second": 2.129,
744
- "eval_wer": 0.20285499624342598,
745
  "step": 960
746
  },
747
  {
748
  "epoch": 16.17,
749
  "learning_rate": 4.85e-05,
750
- "loss": 0.1084,
751
  "step": 970
752
  },
753
  {
754
  "epoch": 16.33,
755
  "learning_rate": 4.9e-05,
756
- "loss": 0.1414,
757
  "step": 980
758
  },
759
  {
760
  "epoch": 16.5,
761
  "learning_rate": 4.9500000000000004e-05,
762
- "loss": 0.1185,
763
  "step": 990
764
  },
765
  {
766
  "epoch": 16.67,
767
  "learning_rate": 5e-05,
768
- "loss": 0.1554,
769
  "step": 1000
770
  },
771
  {
772
  "epoch": 16.83,
773
  "learning_rate": 4.9545454545454553e-05,
774
- "loss": 0.1247,
775
  "step": 1010
776
  },
777
  {
778
  "epoch": 17.0,
779
  "learning_rate": 4.909090909090909e-05,
780
- "loss": 0.1739,
781
  "step": 1020
782
  },
783
  {
784
  "epoch": 17.0,
785
- "eval_cer": 0.14835102133972383,
786
- "eval_loss": 0.02305447682738304,
787
- "eval_runtime": 7.0249,
788
- "eval_samples_per_second": 68.328,
789
- "eval_steps_per_second": 2.135,
790
- "eval_wer": 0.20022539444027046,
791
  "step": 1020
792
  },
793
  {
794
  "epoch": 17.17,
795
  "learning_rate": 4.863636363636364e-05,
796
- "loss": 0.1617,
797
  "step": 1030
798
  },
799
  {
800
  "epoch": 17.33,
801
  "learning_rate": 4.8181818181818186e-05,
802
- "loss": 0.1236,
803
  "step": 1040
804
  },
805
  {
806
  "epoch": 17.5,
807
  "learning_rate": 4.772727272727273e-05,
808
- "loss": 0.108,
809
  "step": 1050
810
  },
811
  {
812
  "epoch": 17.67,
813
  "learning_rate": 4.7272727272727275e-05,
814
- "loss": 0.1326,
815
  "step": 1060
816
  },
817
  {
818
  "epoch": 17.83,
819
  "learning_rate": 4.681818181818182e-05,
820
- "loss": 0.1178,
821
  "step": 1070
822
  },
823
  {
824
  "epoch": 18.0,
825
  "learning_rate": 4.636363636363636e-05,
826
- "loss": 0.1035,
827
  "step": 1080
828
  },
829
  {
830
  "epoch": 18.0,
831
- "eval_cer": 0.14412872304005478,
832
- "eval_loss": 0.01757422275841236,
833
- "eval_runtime": 7.0175,
834
- "eval_samples_per_second": 68.401,
835
- "eval_steps_per_second": 2.138,
836
- "eval_wer": 0.19646882043576258,
837
  "step": 1080
838
  },
839
  {
840
  "epoch": 18.17,
841
  "learning_rate": 4.5909090909090914e-05,
842
- "loss": 0.1795,
843
  "step": 1090
844
  },
845
  {
846
  "epoch": 18.33,
847
  "learning_rate": 4.545454545454546e-05,
848
- "loss": 0.1385,
849
  "step": 1100
850
  },
851
  {
852
  "epoch": 18.5,
853
  "learning_rate": 4.5e-05,
854
- "loss": 0.1245,
855
  "step": 1110
856
  },
857
  {
858
  "epoch": 18.67,
859
  "learning_rate": 4.454545454545455e-05,
860
- "loss": 0.1147,
861
  "step": 1120
862
  },
863
  {
864
  "epoch": 18.83,
865
  "learning_rate": 4.409090909090909e-05,
866
- "loss": 0.088,
867
  "step": 1130
868
  },
869
  {
870
  "epoch": 19.0,
871
  "learning_rate": 4.3636363636363636e-05,
872
- "loss": 0.104,
873
  "step": 1140
874
  },
875
  {
876
  "epoch": 19.0,
877
- "eval_cer": 0.16466963368709345,
878
- "eval_loss": 0.013819140382111073,
879
- "eval_runtime": 6.995,
880
- "eval_samples_per_second": 68.62,
881
- "eval_steps_per_second": 2.144,
882
- "eval_wer": 0.1897069872276484,
883
  "step": 1140
884
  },
885
  {
886
  "epoch": 19.17,
887
  "learning_rate": 4.318181818181819e-05,
888
- "loss": 0.0868,
889
  "step": 1150
890
  },
891
  {
892
  "epoch": 19.33,
893
  "learning_rate": 4.2727272727272724e-05,
894
- "loss": 0.1332,
895
  "step": 1160
896
  },
897
  {
898
  "epoch": 19.5,
899
  "learning_rate": 4.2272727272727275e-05,
900
- "loss": 0.0902,
901
  "step": 1170
902
  },
903
  {
904
  "epoch": 19.67,
905
  "learning_rate": 4.181818181818182e-05,
906
- "loss": 0.0942,
907
  "step": 1180
908
  },
909
  {
910
  "epoch": 19.83,
911
  "learning_rate": 4.1363636363636364e-05,
912
- "loss": 0.1089,
913
  "step": 1190
914
  },
915
  {
916
  "epoch": 20.0,
917
  "learning_rate": 4.0909090909090915e-05,
918
- "loss": 0.1195,
919
  "step": 1200
920
  },
921
  {
922
  "epoch": 20.0,
923
- "eval_cer": 0.1324888736733995,
924
- "eval_loss": 0.01452054362744093,
925
- "eval_runtime": 7.0028,
926
- "eval_samples_per_second": 68.544,
927
- "eval_steps_per_second": 2.142,
928
- "eval_wer": 0.19158527422990232,
929
  "step": 1200
930
  },
931
  {
932
  "epoch": 20.17,
933
  "learning_rate": 4.045454545454546e-05,
934
- "loss": 0.0691,
935
  "step": 1210
936
  },
937
  {
938
  "epoch": 20.33,
939
  "learning_rate": 4e-05,
940
- "loss": 0.0931,
941
  "step": 1220
942
  },
943
  {
944
  "epoch": 20.5,
945
  "learning_rate": 3.954545454545455e-05,
946
- "loss": 0.1074,
947
  "step": 1230
948
  },
949
  {
950
  "epoch": 20.67,
951
  "learning_rate": 3.909090909090909e-05,
952
- "loss": 0.0775,
953
  "step": 1240
954
  },
955
  {
956
  "epoch": 20.83,
957
  "learning_rate": 3.8636363636363636e-05,
958
- "loss": 0.0924,
959
  "step": 1250
960
  },
961
  {
962
  "epoch": 21.0,
963
  "learning_rate": 3.818181818181819e-05,
964
- "loss": 0.0982,
965
  "step": 1260
966
  },
967
  {
968
  "epoch": 21.0,
969
- "eval_cer": 0.13214652516261555,
970
- "eval_loss": 0.010722682811319828,
971
- "eval_runtime": 7.016,
972
- "eval_samples_per_second": 68.415,
973
- "eval_steps_per_second": 2.138,
974
- "eval_wer": 0.1893313298271976,
975
  "step": 1260
976
  },
977
  {
978
  "epoch": 21.17,
979
  "learning_rate": 3.7727272727272725e-05,
980
- "loss": 0.0691,
981
  "step": 1270
982
  },
983
  {
984
  "epoch": 21.33,
985
  "learning_rate": 3.7272727272727276e-05,
986
- "loss": 0.0704,
987
  "step": 1280
988
  },
989
  {
990
  "epoch": 21.5,
991
  "learning_rate": 3.681818181818182e-05,
992
- "loss": 0.0732,
993
  "step": 1290
994
  },
995
  {
996
  "epoch": 21.67,
997
  "learning_rate": 3.6363636363636364e-05,
998
- "loss": 0.0759,
999
  "step": 1300
1000
  },
1001
  {
1002
  "epoch": 21.83,
1003
  "learning_rate": 3.590909090909091e-05,
1004
- "loss": 0.0835,
1005
  "step": 1310
1006
  },
1007
  {
1008
  "epoch": 22.0,
1009
  "learning_rate": 3.545454545454546e-05,
1010
- "loss": 0.0755,
1011
  "step": 1320
1012
  },
1013
  {
1014
  "epoch": 22.0,
1015
- "eval_cer": 0.1292936209060824,
1016
- "eval_loss": 0.009691164828836918,
1017
- "eval_runtime": 7.0198,
1018
- "eval_samples_per_second": 68.378,
1019
- "eval_steps_per_second": 2.137,
1020
- "eval_wer": 0.18707738542449287,
1021
  "step": 1320
1022
  },
1023
  {
1024
  "epoch": 22.17,
1025
  "learning_rate": 3.5e-05,
1026
- "loss": 0.087,
1027
  "step": 1330
1028
  },
1029
  {
1030
  "epoch": 22.33,
1031
  "learning_rate": 3.454545454545455e-05,
1032
- "loss": 0.0778,
1033
  "step": 1340
1034
  },
1035
  {
1036
  "epoch": 22.5,
1037
  "learning_rate": 3.409090909090909e-05,
1038
- "loss": 0.109,
1039
  "step": 1350
1040
  },
1041
  {
1042
  "epoch": 22.67,
1043
  "learning_rate": 3.3636363636363636e-05,
1044
- "loss": 0.0558,
1045
  "step": 1360
1046
  },
1047
  {
1048
  "epoch": 22.83,
1049
  "learning_rate": 3.318181818181819e-05,
1050
- "loss": 0.0814,
1051
  "step": 1370
1052
  },
1053
  {
1054
  "epoch": 23.0,
1055
  "learning_rate": 3.272727272727273e-05,
1056
- "loss": 0.0702,
1057
  "step": 1380
1058
  },
1059
  {
1060
  "epoch": 23.0,
1061
- "eval_cer": 0.14070523793221498,
1062
- "eval_loss": 0.008381030522286892,
1063
- "eval_runtime": 6.9996,
1064
- "eval_samples_per_second": 68.575,
1065
- "eval_steps_per_second": 2.143,
1066
- "eval_wer": 0.1859504132231405,
1067
  "step": 1380
1068
  },
1069
  {
1070
  "epoch": 23.17,
1071
  "learning_rate": 3.2272727272727276e-05,
1072
- "loss": 0.0639,
1073
  "step": 1390
1074
  },
1075
  {
1076
  "epoch": 23.33,
1077
  "learning_rate": 3.181818181818182e-05,
1078
- "loss": 0.0483,
1079
  "step": 1400
1080
  },
1081
  {
1082
  "epoch": 23.5,
1083
  "learning_rate": 3.1363636363636365e-05,
1084
- "loss": 0.0974,
1085
  "step": 1410
1086
  },
1087
  {
1088
  "epoch": 23.67,
1089
  "learning_rate": 3.090909090909091e-05,
1090
- "loss": 0.0759,
1091
  "step": 1420
1092
  },
1093
  {
1094
  "epoch": 23.83,
1095
  "learning_rate": 3.0454545454545456e-05,
1096
- "loss": 0.0523,
1097
  "step": 1430
1098
  },
1099
  {
1100
  "epoch": 24.0,
1101
  "learning_rate": 3e-05,
1102
- "loss": 0.044,
1103
  "step": 1440
1104
  },
1105
  {
1106
  "epoch": 24.0,
1107
- "eval_cer": 0.14173228346456693,
1108
- "eval_loss": 0.006644606590270996,
1109
- "eval_runtime": 6.9898,
1110
- "eval_samples_per_second": 68.672,
1111
- "eval_steps_per_second": 2.146,
1112
- "eval_wer": 0.1825694966190834,
1113
  "step": 1440
1114
  },
1115
  {
1116
  "epoch": 24.17,
1117
  "learning_rate": 2.954545454545455e-05,
1118
- "loss": 0.06,
1119
  "step": 1450
1120
  },
1121
  {
1122
  "epoch": 24.33,
1123
  "learning_rate": 2.909090909090909e-05,
1124
- "loss": 0.0509,
1125
  "step": 1460
1126
  },
1127
  {
1128
  "epoch": 24.5,
1129
  "learning_rate": 2.863636363636364e-05,
1130
- "loss": 0.0659,
1131
  "step": 1470
1132
  },
1133
  {
1134
  "epoch": 24.67,
1135
  "learning_rate": 2.818181818181818e-05,
1136
- "loss": 0.0749,
1137
  "step": 1480
1138
  },
1139
  {
1140
  "epoch": 24.83,
1141
  "learning_rate": 2.772727272727273e-05,
1142
- "loss": 0.0807,
1143
  "step": 1490
1144
  },
1145
  {
1146
  "epoch": 25.0,
1147
  "learning_rate": 2.7272727272727273e-05,
1148
- "loss": 0.0735,
1149
  "step": 1500
1150
  },
1151
  {
1152
  "epoch": 25.0,
1153
- "eval_cer": 0.14184639963482826,
1154
- "eval_loss": 0.007389193866401911,
1155
- "eval_runtime": 7.0211,
1156
- "eval_samples_per_second": 68.365,
1157
- "eval_steps_per_second": 2.136,
1158
- "eval_wer": 0.18482344102178813,
1159
  "step": 1500
1160
  },
1161
  {
1162
  "epoch": 25.17,
1163
  "learning_rate": 2.681818181818182e-05,
1164
- "loss": 0.052,
1165
  "step": 1510
1166
  },
1167
  {
1168
  "epoch": 25.33,
1169
  "learning_rate": 2.636363636363636e-05,
1170
- "loss": 0.0785,
1171
  "step": 1520
1172
  },
1173
  {
1174
  "epoch": 25.5,
1175
  "learning_rate": 2.590909090909091e-05,
1176
- "loss": 0.0448,
1177
  "step": 1530
1178
  },
1179
  {
1180
  "epoch": 25.67,
1181
  "learning_rate": 2.5454545454545454e-05,
1182
- "loss": 0.0588,
1183
  "step": 1540
1184
  },
1185
  {
1186
  "epoch": 25.83,
1187
  "learning_rate": 2.5e-05,
1188
- "loss": 0.0831,
1189
  "step": 1550
1190
  },
1191
  {
1192
  "epoch": 26.0,
1193
  "learning_rate": 2.4545454545454545e-05,
1194
- "loss": 0.0736,
1195
  "step": 1560
1196
  },
1197
  {
1198
  "epoch": 26.0,
1199
- "eval_cer": 0.14184639963482826,
1200
- "eval_loss": 0.006949111353605986,
1201
- "eval_runtime": 7.0451,
1202
- "eval_samples_per_second": 68.133,
1203
- "eval_steps_per_second": 2.129,
1204
- "eval_wer": 0.1859504132231405,
1205
  "step": 1560
1206
  },
1207
  {
1208
  "epoch": 26.17,
1209
  "learning_rate": 2.4090909090909093e-05,
1210
- "loss": 0.0547,
1211
  "step": 1570
1212
  },
1213
  {
1214
  "epoch": 26.33,
1215
  "learning_rate": 2.3636363636363637e-05,
1216
- "loss": 0.0507,
1217
  "step": 1580
1218
  },
1219
  {
1220
  "epoch": 26.5,
1221
  "learning_rate": 2.318181818181818e-05,
1222
- "loss": 0.0708,
1223
  "step": 1590
1224
  },
1225
  {
1226
  "epoch": 26.67,
1227
  "learning_rate": 2.272727272727273e-05,
1228
- "loss": 0.0636,
1229
  "step": 1600
1230
  },
1231
  {
1232
  "epoch": 26.83,
1233
  "learning_rate": 2.2272727272727274e-05,
1234
- "loss": 0.0559,
1235
  "step": 1610
1236
  },
1237
  {
1238
  "epoch": 27.0,
1239
  "learning_rate": 2.1818181818181818e-05,
1240
- "loss": 0.0714,
1241
  "step": 1620
1242
  },
1243
  {
1244
  "epoch": 27.0,
1245
- "eval_cer": 0.1394499600593404,
1246
- "eval_loss": 0.005488261580467224,
1247
- "eval_runtime": 7.0321,
1248
- "eval_samples_per_second": 68.258,
1249
- "eval_steps_per_second": 2.133,
1250
- "eval_wer": 0.18369646882043575,
1251
  "step": 1620
1252
  },
1253
  {
1254
  "epoch": 27.17,
1255
  "learning_rate": 2.1363636363636362e-05,
1256
- "loss": 0.0523,
1257
  "step": 1630
1258
  },
1259
  {
1260
  "epoch": 27.33,
1261
  "learning_rate": 2.090909090909091e-05,
1262
- "loss": 0.0641,
1263
  "step": 1640
1264
  },
1265
  {
1266
  "epoch": 27.5,
1267
  "learning_rate": 2.0454545454545457e-05,
1268
- "loss": 0.0525,
1269
  "step": 1650
1270
  },
1271
  {
1272
  "epoch": 27.67,
1273
  "learning_rate": 2e-05,
1274
- "loss": 0.042,
1275
  "step": 1660
1276
  },
1277
  {
1278
  "epoch": 27.83,
1279
  "learning_rate": 1.9545454545454546e-05,
1280
- "loss": 0.0383,
1281
  "step": 1670
1282
  },
1283
  {
1284
  "epoch": 28.0,
1285
  "learning_rate": 1.9090909090909094e-05,
1286
- "loss": 0.062,
1287
  "step": 1680
1288
  },
1289
  {
1290
  "epoch": 28.0,
1291
- "eval_cer": 0.14412872304005478,
1292
- "eval_loss": 0.0047158109955489635,
1293
- "eval_runtime": 7.0331,
1294
- "eval_samples_per_second": 68.249,
1295
- "eval_steps_per_second": 2.133,
1296
- "eval_wer": 0.18332081141998496,
1297
  "step": 1680
1298
  },
1299
  {
1300
  "epoch": 28.17,
1301
  "learning_rate": 1.8636363636363638e-05,
1302
- "loss": 0.0732,
1303
  "step": 1690
1304
  },
1305
  {
1306
  "epoch": 28.33,
1307
  "learning_rate": 1.8181818181818182e-05,
1308
- "loss": 0.0485,
1309
  "step": 1700
1310
  },
1311
  {
1312
  "epoch": 28.5,
1313
  "learning_rate": 1.772727272727273e-05,
1314
- "loss": 0.0909,
1315
  "step": 1710
1316
  },
1317
  {
1318
  "epoch": 28.67,
1319
  "learning_rate": 1.7272727272727274e-05,
1320
- "loss": 0.0621,
1321
  "step": 1720
1322
  },
1323
  {
1324
  "epoch": 28.83,
1325
  "learning_rate": 1.6818181818181818e-05,
1326
- "loss": 0.0584,
1327
  "step": 1730
1328
  },
1329
  {
1330
  "epoch": 29.0,
1331
  "learning_rate": 1.6363636363636366e-05,
1332
- "loss": 0.0497,
1333
  "step": 1740
1334
  },
1335
  {
1336
  "epoch": 29.0,
1337
- "eval_cer": 0.14412872304005478,
1338
- "eval_loss": 0.004752307198941708,
1339
- "eval_runtime": 7.049,
1340
- "eval_samples_per_second": 68.094,
1341
- "eval_steps_per_second": 2.128,
1342
- "eval_wer": 0.18294515401953418,
1343
  "step": 1740
1344
  },
1345
  {
1346
  "epoch": 29.17,
1347
  "learning_rate": 1.590909090909091e-05,
1348
- "loss": 0.057,
1349
  "step": 1750
1350
  },
1351
  {
1352
  "epoch": 29.33,
1353
  "learning_rate": 1.5454545454545454e-05,
1354
- "loss": 0.053,
1355
  "step": 1760
1356
  },
1357
  {
1358
  "epoch": 29.5,
1359
  "learning_rate": 1.5e-05,
1360
- "loss": 0.0446,
1361
  "step": 1770
1362
  },
1363
  {
1364
  "epoch": 29.67,
1365
  "learning_rate": 1.4545454545454545e-05,
1366
- "loss": 0.0599,
1367
  "step": 1780
1368
  },
1369
  {
1370
  "epoch": 29.83,
1371
  "learning_rate": 1.409090909090909e-05,
1372
- "loss": 0.0412,
1373
  "step": 1790
1374
  },
1375
  {
1376
  "epoch": 30.0,
1377
  "learning_rate": 1.3636363636363637e-05,
1378
- "loss": 0.0482,
1379
  "step": 1800
1380
  },
1381
  {
1382
  "epoch": 30.0,
1383
- "eval_cer": 0.14755220814789455,
1384
- "eval_loss": 0.004521808121353388,
1385
- "eval_runtime": 7.0182,
1386
- "eval_samples_per_second": 68.393,
1387
- "eval_steps_per_second": 2.137,
1388
- "eval_wer": 0.18144252441773104,
1389
  "step": 1800
1390
  },
1391
  {
1392
  "epoch": 30.17,
1393
  "learning_rate": 1.318181818181818e-05,
1394
- "loss": 0.0556,
1395
  "step": 1810
1396
  },
1397
  {
1398
  "epoch": 30.33,
1399
  "learning_rate": 1.2727272727272727e-05,
1400
- "loss": 0.0522,
1401
  "step": 1820
1402
  },
1403
  {
1404
  "epoch": 30.5,
1405
  "learning_rate": 1.2272727272727273e-05,
1406
- "loss": 0.0554,
1407
  "step": 1830
1408
  },
1409
  {
1410
  "epoch": 30.67,
1411
  "learning_rate": 1.1818181818181819e-05,
1412
- "loss": 0.0467,
1413
  "step": 1840
1414
  },
1415
  {
1416
  "epoch": 30.83,
1417
  "learning_rate": 1.1363636363636365e-05,
1418
- "loss": 0.0472,
1419
  "step": 1850
1420
  },
1421
  {
1422
  "epoch": 31.0,
1423
  "learning_rate": 1.0909090909090909e-05,
1424
- "loss": 0.072,
1425
  "step": 1860
1426
  },
1427
  {
1428
  "epoch": 31.0,
1429
- "eval_cer": 0.14869336985050782,
1430
- "eval_loss": 0.004135935567319393,
1431
- "eval_runtime": 7.0119,
1432
- "eval_samples_per_second": 68.455,
1433
- "eval_steps_per_second": 2.139,
1434
- "eval_wer": 0.18181818181818182,
1435
  "step": 1860
1436
  },
1437
  {
1438
  "epoch": 31.17,
1439
  "learning_rate": 1.0454545454545455e-05,
1440
- "loss": 0.0459,
1441
  "step": 1870
1442
  },
1443
  {
1444
  "epoch": 31.33,
1445
  "learning_rate": 1e-05,
1446
- "loss": 0.0491,
1447
  "step": 1880
1448
  },
1449
  {
1450
  "epoch": 31.5,
1451
  "learning_rate": 9.545454545454547e-06,
1452
- "loss": 0.0632,
1453
  "step": 1890
1454
  },
1455
  {
1456
  "epoch": 31.67,
1457
  "learning_rate": 9.090909090909091e-06,
1458
- "loss": 0.0569,
1459
  "step": 1900
1460
  },
1461
  {
1462
  "epoch": 31.83,
1463
  "learning_rate": 8.636363636363637e-06,
1464
- "loss": 0.0597,
1465
  "step": 1910
1466
  },
1467
  {
1468
  "epoch": 32.0,
1469
  "learning_rate": 8.181818181818183e-06,
1470
- "loss": 0.0443,
1471
  "step": 1920
1472
  },
1473
  {
1474
  "epoch": 32.0,
1475
- "eval_cer": 0.14572634942371335,
1476
- "eval_loss": 0.004406373482197523,
1477
- "eval_runtime": 7.044,
1478
- "eval_samples_per_second": 68.143,
1479
- "eval_steps_per_second": 2.129,
1480
- "eval_wer": 0.18106686701728025,
1481
  "step": 1920
1482
  },
1483
  {
1484
  "epoch": 32.17,
1485
  "learning_rate": 7.727272727272727e-06,
1486
- "loss": 0.0549,
1487
  "step": 1930
1488
  },
1489
  {
1490
  "epoch": 32.33,
1491
  "learning_rate": 7.272727272727272e-06,
1492
- "loss": 0.1098,
1493
  "step": 1940
1494
  },
1495
  {
1496
  "epoch": 32.5,
1497
  "learning_rate": 6.818181818181818e-06,
1498
- "loss": 0.0431,
1499
  "step": 1950
1500
  },
1501
  {
1502
  "epoch": 32.67,
1503
  "learning_rate": 6.363636363636363e-06,
1504
- "loss": 0.0349,
1505
  "step": 1960
1506
  },
1507
  {
1508
  "epoch": 32.83,
1509
  "learning_rate": 5.909090909090909e-06,
1510
- "loss": 0.0394,
1511
  "step": 1970
1512
  },
1513
  {
1514
  "epoch": 33.0,
1515
  "learning_rate": 5.4545454545454545e-06,
1516
- "loss": 0.0494,
1517
  "step": 1980
1518
  },
1519
  {
1520
  "epoch": 33.0,
1521
- "eval_cer": 0.1429875613374415,
1522
- "eval_loss": 0.0046097091399133205,
1523
- "eval_runtime": 7.0091,
1524
- "eval_samples_per_second": 68.482,
1525
- "eval_steps_per_second": 2.14,
1526
- "eval_wer": 0.18181818181818182,
1527
  "step": 1980
1528
  },
1529
  {
1530
  "epoch": 33.17,
1531
  "learning_rate": 5e-06,
1532
- "loss": 0.0722,
1533
  "step": 1990
1534
  },
1535
  {
1536
  "epoch": 33.33,
1537
  "learning_rate": 4.5454545454545455e-06,
1538
- "loss": 0.0488,
1539
  "step": 2000
1540
  },
1541
  {
1542
  "epoch": 33.5,
1543
  "learning_rate": 4.0909090909090915e-06,
1544
- "loss": 0.0455,
1545
  "step": 2010
1546
  },
1547
  {
1548
  "epoch": 33.67,
1549
  "learning_rate": 3.636363636363636e-06,
1550
- "loss": 0.0478,
1551
  "step": 2020
1552
  },
1553
  {
1554
  "epoch": 33.83,
1555
  "learning_rate": 3.1818181818181817e-06,
1556
- "loss": 0.0362,
1557
  "step": 2030
1558
  },
1559
  {
1560
  "epoch": 34.0,
1561
  "learning_rate": 2.7272727272727272e-06,
1562
- "loss": 0.053,
1563
  "step": 2040
1564
  },
1565
  {
1566
  "epoch": 34.0,
1567
- "eval_cer": 0.14184639963482826,
1568
- "eval_loss": 0.0043495288118720055,
1569
- "eval_runtime": 7.0333,
1570
- "eval_samples_per_second": 68.247,
1571
- "eval_steps_per_second": 2.133,
1572
- "eval_wer": 0.18144252441773104,
1573
  "step": 2040
1574
  },
1575
  {
1576
  "epoch": 34.17,
1577
  "learning_rate": 2.2727272727272728e-06,
1578
- "loss": 0.0489,
1579
  "step": 2050
1580
  },
1581
  {
1582
  "epoch": 34.33,
1583
  "learning_rate": 1.818181818181818e-06,
1584
- "loss": 0.0543,
1585
  "step": 2060
1586
  },
1587
  {
1588
  "epoch": 34.5,
1589
  "learning_rate": 1.3636363636363636e-06,
1590
- "loss": 0.0535,
1591
  "step": 2070
1592
  },
1593
  {
1594
  "epoch": 34.67,
1595
  "learning_rate": 9.09090909090909e-07,
1596
- "loss": 0.0797,
1597
  "step": 2080
1598
  },
1599
  {
1600
  "epoch": 34.83,
1601
  "learning_rate": 4.545454545454545e-07,
1602
- "loss": 0.0624,
1603
  "step": 2090
1604
  },
1605
  {
1606
  "epoch": 35.0,
1607
  "learning_rate": 0.0,
1608
- "loss": 0.0465,
1609
  "step": 2100
1610
  },
1611
  {
1612
  "epoch": 35.0,
1613
- "eval_cer": 0.14104758644299897,
1614
- "eval_loss": 0.004232622217386961,
1615
- "eval_runtime": 7.0183,
1616
- "eval_samples_per_second": 68.393,
1617
- "eval_steps_per_second": 2.137,
1618
- "eval_wer": 0.18144252441773104,
1619
  "step": 2100
1620
  },
1621
  {
1622
  "epoch": 35.0,
1623
  "step": 2100,
1624
- "total_flos": 5.308766550976189e+18,
1625
- "train_loss": 1.0713055984321094,
1626
- "train_runtime": 4228.7223,
1627
- "train_samples_per_second": 15.891,
1628
  "train_steps_per_second": 0.497
1629
  }
1630
  ],
@@ -1632,7 +1632,7 @@
1632
  "max_steps": 2100,
1633
  "num_train_epochs": 35,
1634
  "save_steps": 500,
1635
- "total_flos": 5.308766550976189e+18,
1636
  "trial_name": null,
1637
  "trial_params": null
1638
  }
 
11
  {
12
  "epoch": 0.17,
13
  "learning_rate": 5.000000000000001e-07,
14
+ "loss": 17.2251,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.33,
19
  "learning_rate": 1.0000000000000002e-06,
20
+ "loss": 17.329,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.5,
25
  "learning_rate": 1.5e-06,
26
+ "loss": 16.6944,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.67,
31
  "learning_rate": 2.0000000000000003e-06,
32
+ "loss": 16.5026,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.83,
37
  "learning_rate": 2.5e-06,
38
+ "loss": 15.637,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 1.0,
43
  "learning_rate": 3e-06,
44
+ "loss": 14.5102,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 1.0,
49
+ "eval_cer": 1.2966024915062289,
50
+ "eval_loss": 13.54184341430664,
51
+ "eval_runtime": 7.0367,
52
+ "eval_samples_per_second": 68.214,
53
+ "eval_steps_per_second": 2.132,
54
+ "eval_wer": 1.0011119347664936,
55
  "step": 60
56
  },
57
  {
58
  "epoch": 1.17,
59
  "learning_rate": 3.5000000000000004e-06,
60
+ "loss": 13.6535,
61
  "step": 70
62
  },
63
  {
64
  "epoch": 1.33,
65
  "learning_rate": 4.000000000000001e-06,
66
+ "loss": 11.923,
67
  "step": 80
68
  },
69
  {
70
  "epoch": 1.5,
71
  "learning_rate": 4.5e-06,
72
+ "loss": 10.5358,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 1.67,
77
  "learning_rate": 5e-06,
78
+ "loss": 8.2827,
79
  "step": 100
80
  },
81
  {
82
  "epoch": 1.83,
83
  "learning_rate": 5.500000000000001e-06,
84
+ "loss": 6.8578,
85
  "step": 110
86
  },
87
  {
88
  "epoch": 2.0,
89
  "learning_rate": 6e-06,
90
+ "loss": 6.2989,
91
  "step": 120
92
  },
93
  {
94
  "epoch": 2.0,
95
+ "eval_cer": 0.8736126840317101,
96
+ "eval_loss": 4.612649917602539,
97
+ "eval_runtime": 7.5054,
98
+ "eval_samples_per_second": 63.954,
99
+ "eval_steps_per_second": 1.999,
100
+ "eval_wer": 1.0,
101
  "step": 120
102
  },
103
  {
104
  "epoch": 2.17,
105
  "learning_rate": 6.5000000000000004e-06,
106
+ "loss": 4.6838,
107
  "step": 130
108
  },
109
  {
110
  "epoch": 2.33,
111
  "learning_rate": 7.000000000000001e-06,
112
+ "loss": 4.0318,
113
  "step": 140
114
  },
115
  {
116
  "epoch": 2.5,
117
  "learning_rate": 7.5e-06,
118
+ "loss": 3.2615,
119
  "step": 150
120
  },
121
  {
122
  "epoch": 2.67,
123
  "learning_rate": 8.000000000000001e-06,
124
+ "loss": 2.8846,
125
  "step": 160
126
  },
127
  {
128
  "epoch": 2.83,
129
  "learning_rate": 8.500000000000002e-06,
130
+ "loss": 2.5007,
131
  "step": 170
132
  },
133
  {
134
  "epoch": 3.0,
135
  "learning_rate": 9e-06,
136
+ "loss": 2.208,
137
  "step": 180
138
  },
139
  {
140
  "epoch": 3.0,
141
+ "eval_cer": 0.8159682899207248,
142
+ "eval_loss": 1.7111728191375732,
143
+ "eval_runtime": 7.0161,
144
+ "eval_samples_per_second": 68.414,
145
+ "eval_steps_per_second": 2.138,
146
+ "eval_wer": 0.9992587101556709,
147
  "step": 180
148
  },
149
  {
150
  "epoch": 3.17,
151
  "learning_rate": 9.5e-06,
152
+ "loss": 1.8824,
153
  "step": 190
154
  },
155
  {
156
  "epoch": 3.33,
157
  "learning_rate": 1e-05,
158
+ "loss": 1.6231,
159
  "step": 200
160
  },
161
  {
162
  "epoch": 3.5,
163
  "learning_rate": 1.05e-05,
164
+ "loss": 1.5416,
165
  "step": 210
166
  },
167
  {
168
  "epoch": 3.67,
169
  "learning_rate": 1.1000000000000001e-05,
170
+ "loss": 1.3219,
171
  "step": 220
172
  },
173
  {
174
  "epoch": 3.83,
175
  "learning_rate": 1.1500000000000002e-05,
176
+ "loss": 1.1937,
177
  "step": 230
178
  },
179
  {
180
  "epoch": 4.0,
181
  "learning_rate": 1.2e-05,
182
+ "loss": 1.098,
183
  "step": 240
184
  },
185
  {
186
  "epoch": 4.0,
187
+ "eval_cer": 0.569762174405436,
188
+ "eval_loss": 0.8765236139297485,
189
+ "eval_runtime": 7.0276,
190
+ "eval_samples_per_second": 68.302,
191
+ "eval_steps_per_second": 2.134,
192
+ "eval_wer": 0.8409933283914011,
193
  "step": 240
194
  },
195
  {
196
  "epoch": 4.17,
197
  "learning_rate": 1.25e-05,
198
+ "loss": 1.1276,
199
  "step": 250
200
  },
201
  {
202
  "epoch": 4.33,
203
  "learning_rate": 1.3000000000000001e-05,
204
+ "loss": 0.9733,
205
  "step": 260
206
  },
207
  {
208
  "epoch": 4.5,
209
  "learning_rate": 1.3500000000000001e-05,
210
+ "loss": 0.9508,
211
  "step": 270
212
  },
213
  {
214
  "epoch": 4.67,
215
  "learning_rate": 1.4000000000000001e-05,
216
+ "loss": 0.8988,
217
  "step": 280
218
  },
219
  {
220
  "epoch": 4.83,
221
  "learning_rate": 1.45e-05,
222
+ "loss": 0.8499,
223
  "step": 290
224
  },
225
  {
226
  "epoch": 5.0,
227
  "learning_rate": 1.5e-05,
228
+ "loss": 0.8201,
229
  "step": 300
230
  },
231
  {
232
  "epoch": 5.0,
233
+ "eval_cer": 0.5339750849377124,
234
+ "eval_loss": 0.6964037418365479,
235
+ "eval_runtime": 7.0359,
236
+ "eval_samples_per_second": 68.221,
237
+ "eval_steps_per_second": 2.132,
238
+ "eval_wer": 0.8220904373610082,
239
  "step": 300
240
  },
241
  {
242
  "epoch": 5.17,
243
  "learning_rate": 1.55e-05,
244
+ "loss": 0.7975,
245
  "step": 310
246
  },
247
  {
248
  "epoch": 5.33,
249
  "learning_rate": 1.6000000000000003e-05,
250
+ "loss": 0.7687,
251
  "step": 320
252
  },
253
  {
254
  "epoch": 5.5,
255
  "learning_rate": 1.65e-05,
256
+ "loss": 0.7861,
257
  "step": 330
258
  },
259
  {
260
  "epoch": 5.67,
261
  "learning_rate": 1.7000000000000003e-05,
262
+ "loss": 0.7617,
263
  "step": 340
264
  },
265
  {
266
  "epoch": 5.83,
267
  "learning_rate": 1.75e-05,
268
+ "loss": 0.7434,
269
  "step": 350
270
  },
271
  {
272
  "epoch": 6.0,
273
  "learning_rate": 1.8e-05,
274
+ "loss": 0.7499,
275
  "step": 360
276
  },
277
  {
278
  "epoch": 6.0,
279
+ "eval_cer": 0.5304643261608154,
280
+ "eval_loss": 0.6299313902854919,
281
+ "eval_runtime": 7.057,
282
+ "eval_samples_per_second": 68.018,
283
+ "eval_steps_per_second": 2.126,
284
+ "eval_wer": 0.8217197924388436,
285
  "step": 360
286
  },
287
  {
288
  "epoch": 6.17,
289
  "learning_rate": 1.85e-05,
290
+ "loss": 0.7481,
291
  "step": 370
292
  },
293
  {
294
  "epoch": 6.33,
295
  "learning_rate": 1.9e-05,
296
+ "loss": 0.6974,
297
  "step": 380
298
  },
299
  {
300
  "epoch": 6.5,
301
  "learning_rate": 1.9500000000000003e-05,
302
+ "loss": 0.7176,
303
  "step": 390
304
  },
305
  {
306
  "epoch": 6.67,
307
  "learning_rate": 2e-05,
308
+ "loss": 0.6873,
309
  "step": 400
310
  },
311
  {
312
  "epoch": 6.83,
313
  "learning_rate": 2.05e-05,
314
+ "loss": 0.6699,
315
  "step": 410
316
  },
317
  {
318
  "epoch": 7.0,
319
  "learning_rate": 2.1e-05,
320
+ "loss": 0.6753,
321
  "step": 420
322
  },
323
  {
324
  "epoch": 7.0,
325
+ "eval_cer": 0.4482446206115515,
326
+ "eval_loss": 0.5997506976127625,
327
+ "eval_runtime": 7.0307,
328
+ "eval_samples_per_second": 68.272,
329
+ "eval_steps_per_second": 2.134,
330
+ "eval_wer": 0.7690882134914752,
331
  "step": 420
332
  },
333
  {
334
  "epoch": 7.17,
335
  "learning_rate": 2.15e-05,
336
+ "loss": 0.6553,
337
  "step": 430
338
  },
339
  {
340
  "epoch": 7.33,
341
  "learning_rate": 2.2000000000000003e-05,
342
+ "loss": 0.6329,
343
  "step": 440
344
  },
345
  {
346
  "epoch": 7.5,
347
  "learning_rate": 2.25e-05,
348
+ "loss": 0.6416,
349
  "step": 450
350
  },
351
  {
352
  "epoch": 7.67,
353
  "learning_rate": 2.3000000000000003e-05,
354
+ "loss": 0.6469,
355
  "step": 460
356
  },
357
  {
358
  "epoch": 7.83,
359
  "learning_rate": 2.35e-05,
360
+ "loss": 0.6278,
361
  "step": 470
362
  },
363
  {
364
  "epoch": 8.0,
365
  "learning_rate": 2.4e-05,
366
+ "loss": 0.6003,
367
  "step": 480
368
  },
369
  {
370
  "epoch": 8.0,
371
+ "eval_cer": 0.4563986409966025,
372
+ "eval_loss": 0.5502179861068726,
373
+ "eval_runtime": 7.0474,
374
+ "eval_samples_per_second": 68.11,
375
+ "eval_steps_per_second": 2.128,
376
+ "eval_wer": 0.7394366197183099,
377
  "step": 480
378
  },
379
  {
380
  "epoch": 8.17,
381
  "learning_rate": 2.45e-05,
382
+ "loss": 0.6474,
383
  "step": 490
384
  },
385
  {
386
  "epoch": 8.33,
387
  "learning_rate": 2.5e-05,
388
+ "loss": 0.5815,
389
  "step": 500
390
  },
391
  {
392
  "epoch": 8.5,
393
  "learning_rate": 2.5500000000000003e-05,
394
+ "loss": 0.5954,
395
  "step": 510
396
  },
397
  {
398
  "epoch": 8.67,
399
  "learning_rate": 2.6000000000000002e-05,
400
+ "loss": 0.5791,
401
  "step": 520
402
  },
403
  {
404
  "epoch": 8.83,
405
  "learning_rate": 2.6500000000000004e-05,
406
+ "loss": 0.5805,
407
  "step": 530
408
  },
409
  {
410
  "epoch": 9.0,
411
  "learning_rate": 2.7000000000000002e-05,
412
+ "loss": 0.5732,
413
  "step": 540
414
  },
415
  {
416
  "epoch": 9.0,
417
+ "eval_cer": 0.39060022650056625,
418
+ "eval_loss": 0.5047246217727661,
419
+ "eval_runtime": 7.0721,
420
+ "eval_samples_per_second": 67.872,
421
+ "eval_steps_per_second": 2.121,
422
+ "eval_wer": 0.7097850259451446,
423
  "step": 540
424
  },
425
  {
426
  "epoch": 9.17,
427
  "learning_rate": 2.7500000000000004e-05,
428
+ "loss": 0.5427,
429
  "step": 550
430
  },
431
  {
432
  "epoch": 9.33,
433
  "learning_rate": 2.8000000000000003e-05,
434
+ "loss": 0.5792,
435
  "step": 560
436
  },
437
  {
438
  "epoch": 9.5,
439
  "learning_rate": 2.8499999999999998e-05,
440
+ "loss": 0.5728,
441
  "step": 570
442
  },
443
  {
444
  "epoch": 9.67,
445
  "learning_rate": 2.9e-05,
446
+ "loss": 0.5513,
447
  "step": 580
448
  },
449
  {
450
  "epoch": 9.83,
451
  "learning_rate": 2.95e-05,
452
+ "loss": 0.5349,
453
  "step": 590
454
  },
455
  {
456
  "epoch": 10.0,
457
  "learning_rate": 3e-05,
458
+ "loss": 0.5404,
459
  "step": 600
460
  },
461
  {
462
  "epoch": 10.0,
463
+ "eval_cer": 0.32831257078142695,
464
+ "eval_loss": 0.4693681299686432,
465
+ "eval_runtime": 7.0538,
466
+ "eval_samples_per_second": 68.048,
467
+ "eval_steps_per_second": 2.127,
468
+ "eval_wer": 0.6679021497405485,
469
  "step": 600
470
  },
471
  {
472
  "epoch": 10.17,
473
  "learning_rate": 3.05e-05,
474
+ "loss": 0.5152,
475
  "step": 610
476
  },
477
  {
478
  "epoch": 10.33,
479
  "learning_rate": 3.1e-05,
480
+ "loss": 0.5459,
481
  "step": 620
482
  },
483
  {
484
  "epoch": 10.5,
485
  "learning_rate": 3.15e-05,
486
+ "loss": 0.5283,
487
  "step": 630
488
  },
489
  {
490
  "epoch": 10.67,
491
  "learning_rate": 3.2000000000000005e-05,
492
+ "loss": 0.5099,
493
  "step": 640
494
  },
495
  {
496
  "epoch": 10.83,
497
  "learning_rate": 3.2500000000000004e-05,
498
+ "loss": 0.5053,
499
  "step": 650
500
  },
501
  {
502
  "epoch": 11.0,
503
  "learning_rate": 3.3e-05,
504
+ "loss": 0.4889,
505
  "step": 660
506
  },
507
  {
508
  "epoch": 11.0,
509
+ "eval_cer": 0.30169875424688564,
510
+ "eval_loss": 0.3979022204875946,
511
+ "eval_runtime": 7.0584,
512
+ "eval_samples_per_second": 68.004,
513
+ "eval_steps_per_second": 2.125,
514
+ "eval_wer": 0.6378799110452187,
515
  "step": 660
516
  },
517
  {
518
  "epoch": 11.17,
519
  "learning_rate": 3.35e-05,
520
+ "loss": 0.4605,
521
  "step": 670
522
  },
523
  {
524
  "epoch": 11.33,
525
  "learning_rate": 3.4000000000000007e-05,
526
+ "loss": 0.4497,
527
  "step": 680
528
  },
529
  {
530
  "epoch": 11.5,
531
  "learning_rate": 3.45e-05,
532
+ "loss": 0.4669,
533
  "step": 690
534
  },
535
  {
536
  "epoch": 11.67,
537
  "learning_rate": 3.5e-05,
538
+ "loss": 0.4863,
539
  "step": 700
540
  },
541
  {
542
  "epoch": 11.83,
543
  "learning_rate": 3.55e-05,
544
+ "loss": 0.4751,
545
  "step": 710
546
  },
547
  {
548
  "epoch": 12.0,
549
  "learning_rate": 3.6e-05,
550
+ "loss": 0.4401,
551
  "step": 720
552
  },
553
  {
554
  "epoch": 12.0,
555
+ "eval_cer": 0.27916194790486976,
556
+ "eval_loss": 0.3254798352718353,
557
+ "eval_runtime": 7.0501,
558
+ "eval_samples_per_second": 68.085,
559
+ "eval_steps_per_second": 2.128,
560
+ "eval_wer": 0.5848776871756857,
561
  "step": 720
562
  },
563
  {
564
  "epoch": 12.17,
565
  "learning_rate": 3.65e-05,
566
+ "loss": 0.4469,
567
  "step": 730
568
  },
569
  {
570
  "epoch": 12.33,
571
  "learning_rate": 3.7e-05,
572
+ "loss": 0.4009,
573
  "step": 740
574
  },
575
  {
576
  "epoch": 12.5,
577
  "learning_rate": 3.7500000000000003e-05,
578
+ "loss": 0.3882,
579
  "step": 750
580
  },
581
  {
582
  "epoch": 12.67,
583
  "learning_rate": 3.8e-05,
584
+ "loss": 0.3879,
585
  "step": 760
586
  },
587
  {
588
  "epoch": 12.83,
589
  "learning_rate": 3.85e-05,
590
+ "loss": 0.4118,
591
  "step": 770
592
  },
593
  {
594
  "epoch": 13.0,
595
  "learning_rate": 3.9000000000000006e-05,
596
+ "loss": 0.4295,
597
  "step": 780
598
  },
599
  {
600
  "epoch": 13.0,
601
+ "eval_cer": 0.2772366930917327,
602
+ "eval_loss": 0.2853207290172577,
603
+ "eval_runtime": 7.0684,
604
+ "eval_samples_per_second": 67.908,
605
+ "eval_steps_per_second": 2.122,
606
+ "eval_wer": 0.5044477390659748,
607
  "step": 780
608
  },
609
  {
610
  "epoch": 13.17,
611
  "learning_rate": 3.9500000000000005e-05,
612
+ "loss": 0.3879,
613
  "step": 790
614
  },
615
  {
616
  "epoch": 13.33,
617
  "learning_rate": 4e-05,
618
+ "loss": 0.3465,
619
  "step": 800
620
  },
621
  {
622
  "epoch": 13.5,
623
  "learning_rate": 4.05e-05,
624
+ "loss": 0.3395,
625
  "step": 810
626
  },
627
  {
628
  "epoch": 13.67,
629
  "learning_rate": 4.1e-05,
630
+ "loss": 0.3304,
631
  "step": 820
632
  },
633
  {
634
  "epoch": 13.83,
635
  "learning_rate": 4.15e-05,
636
+ "loss": 0.3393,
637
  "step": 830
638
  },
639
  {
640
  "epoch": 14.0,
641
  "learning_rate": 4.2e-05,
642
+ "loss": 0.3216,
643
  "step": 840
644
  },
645
  {
646
  "epoch": 14.0,
647
+ "eval_cer": 0.2234428086070215,
648
+ "eval_loss": 0.22039471566677094,
649
+ "eval_runtime": 7.0253,
650
+ "eval_samples_per_second": 68.325,
651
+ "eval_steps_per_second": 2.135,
652
+ "eval_wer": 0.45107487027427723,
653
  "step": 840
654
  },
655
  {
656
  "epoch": 14.17,
657
  "learning_rate": 4.25e-05,
658
+ "loss": 0.3149,
659
  "step": 850
660
  },
661
  {
662
  "epoch": 14.33,
663
  "learning_rate": 4.3e-05,
664
+ "loss": 0.3068,
665
  "step": 860
666
  },
667
  {
668
  "epoch": 14.5,
669
  "learning_rate": 4.35e-05,
670
+ "loss": 0.2827,
671
  "step": 870
672
  },
673
  {
674
  "epoch": 14.67,
675
  "learning_rate": 4.4000000000000006e-05,
676
+ "loss": 0.3114,
677
  "step": 880
678
  },
679
  {
680
  "epoch": 14.83,
681
  "learning_rate": 4.4500000000000004e-05,
682
+ "loss": 0.3113,
683
  "step": 890
684
  },
685
  {
686
  "epoch": 15.0,
687
  "learning_rate": 4.5e-05,
688
+ "loss": 0.2583,
689
  "step": 900
690
  },
691
  {
692
  "epoch": 15.0,
693
+ "eval_cer": 0.2130237825594564,
694
+ "eval_loss": 0.14919017255306244,
695
+ "eval_runtime": 7.0711,
696
+ "eval_samples_per_second": 67.882,
697
+ "eval_steps_per_second": 2.121,
698
+ "eval_wer": 0.39288361749444034,
699
  "step": 900
700
  },
701
  {
702
  "epoch": 15.17,
703
  "learning_rate": 4.55e-05,
704
+ "loss": 0.275,
705
  "step": 910
706
  },
707
  {
708
  "epoch": 15.33,
709
  "learning_rate": 4.600000000000001e-05,
710
+ "loss": 0.2363,
711
  "step": 920
712
  },
713
  {
714
  "epoch": 15.5,
715
  "learning_rate": 4.6500000000000005e-05,
716
+ "loss": 0.246,
717
  "step": 930
718
  },
719
  {
720
  "epoch": 15.67,
721
  "learning_rate": 4.7e-05,
722
+ "loss": 0.2328,
723
  "step": 940
724
  },
725
  {
726
  "epoch": 15.83,
727
  "learning_rate": 4.75e-05,
728
+ "loss": 0.2271,
729
  "step": 950
730
  },
731
  {
732
  "epoch": 16.0,
733
  "learning_rate": 4.8e-05,
734
+ "loss": 0.226,
735
  "step": 960
736
  },
737
  {
738
  "epoch": 16.0,
739
+ "eval_cer": 0.16896942242355606,
740
+ "eval_loss": 0.10074901580810547,
741
+ "eval_runtime": 7.0789,
742
+ "eval_samples_per_second": 67.807,
743
+ "eval_steps_per_second": 2.119,
744
+ "eval_wer": 0.28169014084507044,
745
  "step": 960
746
  },
747
  {
748
  "epoch": 16.17,
749
  "learning_rate": 4.85e-05,
750
+ "loss": 0.1883,
751
  "step": 970
752
  },
753
  {
754
  "epoch": 16.33,
755
  "learning_rate": 4.9e-05,
756
+ "loss": 0.1925,
757
  "step": 980
758
  },
759
  {
760
  "epoch": 16.5,
761
  "learning_rate": 4.9500000000000004e-05,
762
+ "loss": 0.2024,
763
  "step": 990
764
  },
765
  {
766
  "epoch": 16.67,
767
  "learning_rate": 5e-05,
768
+ "loss": 0.1988,
769
  "step": 1000
770
  },
771
  {
772
  "epoch": 16.83,
773
  "learning_rate": 4.9545454545454553e-05,
774
+ "loss": 0.1694,
775
  "step": 1010
776
  },
777
  {
778
  "epoch": 17.0,
779
  "learning_rate": 4.909090909090909e-05,
780
+ "loss": 0.2304,
781
  "step": 1020
782
  },
783
  {
784
  "epoch": 17.0,
785
+ "eval_cer": 0.16987542468856173,
786
+ "eval_loss": 0.06942722201347351,
787
+ "eval_runtime": 7.0727,
788
+ "eval_samples_per_second": 67.866,
789
+ "eval_steps_per_second": 2.121,
790
+ "eval_wer": 0.24388435878428466,
791
  "step": 1020
792
  },
793
  {
794
  "epoch": 17.17,
795
  "learning_rate": 4.863636363636364e-05,
796
+ "loss": 0.2023,
797
  "step": 1030
798
  },
799
  {
800
  "epoch": 17.33,
801
  "learning_rate": 4.8181818181818186e-05,
802
+ "loss": 0.1851,
803
  "step": 1040
804
  },
805
  {
806
  "epoch": 17.5,
807
  "learning_rate": 4.772727272727273e-05,
808
+ "loss": 0.1517,
809
  "step": 1050
810
  },
811
  {
812
  "epoch": 17.67,
813
  "learning_rate": 4.7272727272727275e-05,
814
+ "loss": 0.2016,
815
  "step": 1060
816
  },
817
  {
818
  "epoch": 17.83,
819
  "learning_rate": 4.681818181818182e-05,
820
+ "loss": 0.1894,
821
  "step": 1070
822
  },
823
  {
824
  "epoch": 18.0,
825
  "learning_rate": 4.636363636363636e-05,
826
+ "loss": 0.1487,
827
  "step": 1080
828
  },
829
  {
830
  "epoch": 18.0,
831
+ "eval_cer": 0.17576443941109854,
832
+ "eval_loss": 0.047105852514505386,
833
+ "eval_runtime": 7.0679,
834
+ "eval_samples_per_second": 67.913,
835
+ "eval_steps_per_second": 2.122,
836
+ "eval_wer": 0.21423276501111935,
837
  "step": 1080
838
  },
839
  {
840
  "epoch": 18.17,
841
  "learning_rate": 4.5909090909090914e-05,
842
+ "loss": 0.2165,
843
  "step": 1090
844
  },
845
  {
846
  "epoch": 18.33,
847
  "learning_rate": 4.545454545454546e-05,
848
+ "loss": 0.1735,
849
  "step": 1100
850
  },
851
  {
852
  "epoch": 18.5,
853
  "learning_rate": 4.5e-05,
854
+ "loss": 0.1464,
855
  "step": 1110
856
  },
857
  {
858
  "epoch": 18.67,
859
  "learning_rate": 4.454545454545455e-05,
860
+ "loss": 0.162,
861
  "step": 1120
862
  },
863
  {
864
  "epoch": 18.83,
865
  "learning_rate": 4.409090909090909e-05,
866
+ "loss": 0.1301,
867
  "step": 1130
868
  },
869
  {
870
  "epoch": 19.0,
871
  "learning_rate": 4.3636363636363636e-05,
872
+ "loss": 0.1045,
873
  "step": 1140
874
  },
875
  {
876
  "epoch": 19.0,
877
+ "eval_cer": 0.16862967157417894,
878
+ "eval_loss": 0.030526038259267807,
879
+ "eval_runtime": 7.0581,
880
+ "eval_samples_per_second": 68.007,
881
+ "eval_steps_per_second": 2.125,
882
+ "eval_wer": 0.2168272794662713,
883
  "step": 1140
884
  },
885
  {
886
  "epoch": 19.17,
887
  "learning_rate": 4.318181818181819e-05,
888
+ "loss": 0.1027,
889
  "step": 1150
890
  },
891
  {
892
  "epoch": 19.33,
893
  "learning_rate": 4.2727272727272724e-05,
894
+ "loss": 0.1608,
895
  "step": 1160
896
  },
897
  {
898
  "epoch": 19.5,
899
  "learning_rate": 4.2272727272727275e-05,
900
+ "loss": 0.116,
901
  "step": 1170
902
  },
903
  {
904
  "epoch": 19.67,
905
  "learning_rate": 4.181818181818182e-05,
906
+ "loss": 0.119,
907
  "step": 1180
908
  },
909
  {
910
  "epoch": 19.83,
911
  "learning_rate": 4.1363636363636364e-05,
912
+ "loss": 0.1125,
913
  "step": 1190
914
  },
915
  {
916
  "epoch": 20.0,
917
  "learning_rate": 4.0909090909090915e-05,
918
+ "loss": 0.1104,
919
  "step": 1200
920
  },
921
  {
922
  "epoch": 20.0,
923
+ "eval_cer": 0.1625141562853907,
924
+ "eval_loss": 0.02558966353535652,
925
+ "eval_runtime": 7.0898,
926
+ "eval_samples_per_second": 67.703,
927
+ "eval_steps_per_second": 2.116,
928
+ "eval_wer": 0.2071905114899926,
929
  "step": 1200
930
  },
931
  {
932
  "epoch": 20.17,
933
  "learning_rate": 4.045454545454546e-05,
934
+ "loss": 0.0967,
935
  "step": 1210
936
  },
937
  {
938
  "epoch": 20.33,
939
  "learning_rate": 4e-05,
940
+ "loss": 0.1141,
941
  "step": 1220
942
  },
943
  {
944
  "epoch": 20.5,
945
  "learning_rate": 3.954545454545455e-05,
946
+ "loss": 0.1222,
947
  "step": 1230
948
  },
949
  {
950
  "epoch": 20.67,
951
  "learning_rate": 3.909090909090909e-05,
952
+ "loss": 0.0991,
953
  "step": 1240
954
  },
955
  {
956
  "epoch": 20.83,
957
  "learning_rate": 3.8636363636363636e-05,
958
+ "loss": 0.0998,
959
  "step": 1250
960
  },
961
  {
962
  "epoch": 21.0,
963
  "learning_rate": 3.818181818181819e-05,
964
+ "loss": 0.094,
965
  "step": 1260
966
  },
967
  {
968
  "epoch": 21.0,
969
+ "eval_cer": 0.17599093997734994,
970
+ "eval_loss": 0.022575559094548225,
971
+ "eval_runtime": 7.0875,
972
+ "eval_samples_per_second": 67.725,
973
+ "eval_steps_per_second": 2.116,
974
+ "eval_wer": 0.22720533728687917,
975
  "step": 1260
976
  },
977
  {
978
  "epoch": 21.17,
979
  "learning_rate": 3.7727272727272725e-05,
980
+ "loss": 0.0912,
981
  "step": 1270
982
  },
983
  {
984
  "epoch": 21.33,
985
  "learning_rate": 3.7272727272727276e-05,
986
+ "loss": 0.0801,
987
  "step": 1280
988
  },
989
  {
990
  "epoch": 21.5,
991
  "learning_rate": 3.681818181818182e-05,
992
+ "loss": 0.0872,
993
  "step": 1290
994
  },
995
  {
996
  "epoch": 21.67,
997
  "learning_rate": 3.6363636363636364e-05,
998
+ "loss": 0.0801,
999
  "step": 1300
1000
  },
1001
  {
1002
  "epoch": 21.83,
1003
  "learning_rate": 3.590909090909091e-05,
1004
+ "loss": 0.0882,
1005
  "step": 1310
1006
  },
1007
  {
1008
  "epoch": 22.0,
1009
  "learning_rate": 3.545454545454546e-05,
1010
+ "loss": 0.0987,
1011
  "step": 1320
1012
  },
1013
  {
1014
  "epoch": 22.0,
1015
+ "eval_cer": 0.19003397508493772,
1016
+ "eval_loss": 0.012922757305204868,
1017
+ "eval_runtime": 7.0893,
1018
+ "eval_samples_per_second": 67.708,
1019
+ "eval_steps_per_second": 2.116,
1020
+ "eval_wer": 0.20126019273535953,
1021
  "step": 1320
1022
  },
1023
  {
1024
  "epoch": 22.17,
1025
  "learning_rate": 3.5e-05,
1026
+ "loss": 0.0917,
1027
  "step": 1330
1028
  },
1029
  {
1030
  "epoch": 22.33,
1031
  "learning_rate": 3.454545454545455e-05,
1032
+ "loss": 0.0725,
1033
  "step": 1340
1034
  },
1035
  {
1036
  "epoch": 22.5,
1037
  "learning_rate": 3.409090909090909e-05,
1038
+ "loss": 0.102,
1039
  "step": 1350
1040
  },
1041
  {
1042
  "epoch": 22.67,
1043
  "learning_rate": 3.3636363636363636e-05,
1044
+ "loss": 0.0645,
1045
  "step": 1360
1046
  },
1047
  {
1048
  "epoch": 22.83,
1049
  "learning_rate": 3.318181818181819e-05,
1050
+ "loss": 0.069,
1051
  "step": 1370
1052
  },
1053
  {
1054
  "epoch": 23.0,
1055
  "learning_rate": 3.272727272727273e-05,
1056
+ "loss": 0.0753,
1057
  "step": 1380
1058
  },
1059
  {
1060
  "epoch": 23.0,
1061
+ "eval_cer": 0.17859569648924123,
1062
+ "eval_loss": 0.011034397408366203,
1063
+ "eval_runtime": 7.0845,
1064
+ "eval_samples_per_second": 67.754,
1065
+ "eval_steps_per_second": 2.117,
1066
+ "eval_wer": 0.20533728687916974,
1067
  "step": 1380
1068
  },
1069
  {
1070
  "epoch": 23.17,
1071
  "learning_rate": 3.2272727272727276e-05,
1072
+ "loss": 0.067,
1073
  "step": 1390
1074
  },
1075
  {
1076
  "epoch": 23.33,
1077
  "learning_rate": 3.181818181818182e-05,
1078
+ "loss": 0.0618,
1079
  "step": 1400
1080
  },
1081
  {
1082
  "epoch": 23.5,
1083
  "learning_rate": 3.1363636363636365e-05,
1084
+ "loss": 0.0865,
1085
  "step": 1410
1086
  },
1087
  {
1088
  "epoch": 23.67,
1089
  "learning_rate": 3.090909090909091e-05,
1090
+ "loss": 0.0777,
1091
  "step": 1420
1092
  },
1093
  {
1094
  "epoch": 23.83,
1095
  "learning_rate": 3.0454545454545456e-05,
1096
+ "loss": 0.0495,
1097
  "step": 1430
1098
  },
1099
  {
1100
  "epoch": 24.0,
1101
  "learning_rate": 3e-05,
1102
+ "loss": 0.0544,
1103
  "step": 1440
1104
  },
1105
  {
1106
  "epoch": 24.0,
1107
+ "eval_cer": 0.18584371460928653,
1108
+ "eval_loss": 0.009088627062737942,
1109
+ "eval_runtime": 7.0963,
1110
+ "eval_samples_per_second": 67.641,
1111
+ "eval_steps_per_second": 2.114,
1112
+ "eval_wer": 0.19088213491475167,
1113
  "step": 1440
1114
  },
1115
  {
1116
  "epoch": 24.17,
1117
  "learning_rate": 2.954545454545455e-05,
1118
+ "loss": 0.0697,
1119
  "step": 1450
1120
  },
1121
  {
1122
  "epoch": 24.33,
1123
  "learning_rate": 2.909090909090909e-05,
1124
+ "loss": 0.0652,
1125
  "step": 1460
1126
  },
1127
  {
1128
  "epoch": 24.5,
1129
  "learning_rate": 2.863636363636364e-05,
1130
+ "loss": 0.0608,
1131
  "step": 1470
1132
  },
1133
  {
1134
  "epoch": 24.67,
1135
  "learning_rate": 2.818181818181818e-05,
1136
+ "loss": 0.0617,
1137
  "step": 1480
1138
  },
1139
  {
1140
  "epoch": 24.83,
1141
  "learning_rate": 2.772727272727273e-05,
1142
+ "loss": 0.1019,
1143
  "step": 1490
1144
  },
1145
  {
1146
  "epoch": 25.0,
1147
  "learning_rate": 2.7272727272727273e-05,
1148
+ "loss": 0.0684,
1149
  "step": 1500
1150
  },
1151
  {
1152
  "epoch": 25.0,
1153
+ "eval_cer": 0.17281993204983012,
1154
+ "eval_loss": 0.008332420140504837,
1155
+ "eval_runtime": 7.0855,
1156
+ "eval_samples_per_second": 67.744,
1157
+ "eval_steps_per_second": 2.117,
1158
+ "eval_wer": 0.19014084507042253,
1159
  "step": 1500
1160
  },
1161
  {
1162
  "epoch": 25.17,
1163
  "learning_rate": 2.681818181818182e-05,
1164
+ "loss": 0.0579,
1165
  "step": 1510
1166
  },
1167
  {
1168
  "epoch": 25.33,
1169
  "learning_rate": 2.636363636363636e-05,
1170
+ "loss": 0.0733,
1171
  "step": 1520
1172
  },
1173
  {
1174
  "epoch": 25.5,
1175
  "learning_rate": 2.590909090909091e-05,
1176
+ "loss": 0.0514,
1177
  "step": 1530
1178
  },
1179
  {
1180
  "epoch": 25.67,
1181
  "learning_rate": 2.5454545454545454e-05,
1182
+ "loss": 0.0597,
1183
  "step": 1540
1184
  },
1185
  {
1186
  "epoch": 25.83,
1187
  "learning_rate": 2.5e-05,
1188
+ "loss": 0.1024,
1189
  "step": 1550
1190
  },
1191
  {
1192
  "epoch": 26.0,
1193
  "learning_rate": 2.4545454545454545e-05,
1194
+ "loss": 0.0723,
1195
  "step": 1560
1196
  },
1197
  {
1198
  "epoch": 26.0,
1199
+ "eval_cer": 0.18539071347678368,
1200
+ "eval_loss": 0.008348221890628338,
1201
+ "eval_runtime": 7.0631,
1202
+ "eval_samples_per_second": 67.959,
1203
+ "eval_steps_per_second": 2.124,
1204
+ "eval_wer": 0.20274277242401778,
1205
  "step": 1560
1206
  },
1207
  {
1208
  "epoch": 26.17,
1209
  "learning_rate": 2.4090909090909093e-05,
1210
+ "loss": 0.0613,
1211
  "step": 1570
1212
  },
1213
  {
1214
  "epoch": 26.33,
1215
  "learning_rate": 2.3636363636363637e-05,
1216
+ "loss": 0.0478,
1217
  "step": 1580
1218
  },
1219
  {
1220
  "epoch": 26.5,
1221
  "learning_rate": 2.318181818181818e-05,
1222
+ "loss": 0.0807,
1223
  "step": 1590
1224
  },
1225
  {
1226
  "epoch": 26.67,
1227
  "learning_rate": 2.272727272727273e-05,
1228
+ "loss": 0.0645,
1229
  "step": 1600
1230
  },
1231
  {
1232
  "epoch": 26.83,
1233
  "learning_rate": 2.2272727272727274e-05,
1234
+ "loss": 0.0622,
1235
  "step": 1610
1236
  },
1237
  {
1238
  "epoch": 27.0,
1239
  "learning_rate": 2.1818181818181818e-05,
1240
+ "loss": 0.061,
1241
  "step": 1620
1242
  },
1243
  {
1244
  "epoch": 27.0,
1245
+ "eval_cer": 0.17791619479048698,
1246
+ "eval_loss": 0.006116455886512995,
1247
+ "eval_runtime": 7.0609,
1248
+ "eval_samples_per_second": 67.98,
1249
+ "eval_steps_per_second": 2.124,
1250
+ "eval_wer": 0.20200148257968867,
1251
  "step": 1620
1252
  },
1253
  {
1254
  "epoch": 27.17,
1255
  "learning_rate": 2.1363636363636362e-05,
1256
+ "loss": 0.0518,
1257
  "step": 1630
1258
  },
1259
  {
1260
  "epoch": 27.33,
1261
  "learning_rate": 2.090909090909091e-05,
1262
+ "loss": 0.0557,
1263
  "step": 1640
1264
  },
1265
  {
1266
  "epoch": 27.5,
1267
  "learning_rate": 2.0454545454545457e-05,
1268
+ "loss": 0.0494,
1269
  "step": 1650
1270
  },
1271
  {
1272
  "epoch": 27.67,
1273
  "learning_rate": 2e-05,
1274
+ "loss": 0.0396,
1275
  "step": 1660
1276
  },
1277
  {
1278
  "epoch": 27.83,
1279
  "learning_rate": 1.9545454545454546e-05,
1280
+ "loss": 0.0419,
1281
  "step": 1670
1282
  },
1283
  {
1284
  "epoch": 28.0,
1285
  "learning_rate": 1.9090909090909094e-05,
1286
+ "loss": 0.0635,
1287
  "step": 1680
1288
  },
1289
  {
1290
  "epoch": 28.0,
1291
+ "eval_cer": 0.18176670441676104,
1292
+ "eval_loss": 0.005886468570679426,
1293
+ "eval_runtime": 7.1012,
1294
+ "eval_samples_per_second": 67.594,
1295
+ "eval_steps_per_second": 2.112,
1296
+ "eval_wer": 0.19644180874722017,
1297
  "step": 1680
1298
  },
1299
  {
1300
  "epoch": 28.17,
1301
  "learning_rate": 1.8636363636363638e-05,
1302
+ "loss": 0.0981,
1303
  "step": 1690
1304
  },
1305
  {
1306
  "epoch": 28.33,
1307
  "learning_rate": 1.8181818181818182e-05,
1308
+ "loss": 0.0408,
1309
  "step": 1700
1310
  },
1311
  {
1312
  "epoch": 28.5,
1313
  "learning_rate": 1.772727272727273e-05,
1314
+ "loss": 0.0735,
1315
  "step": 1710
1316
  },
1317
  {
1318
  "epoch": 28.67,
1319
  "learning_rate": 1.7272727272727274e-05,
1320
+ "loss": 0.0649,
1321
  "step": 1720
1322
  },
1323
  {
1324
  "epoch": 28.83,
1325
  "learning_rate": 1.6818181818181818e-05,
1326
+ "loss": 0.0522,
1327
  "step": 1730
1328
  },
1329
  {
1330
  "epoch": 29.0,
1331
  "learning_rate": 1.6363636363636366e-05,
1332
+ "loss": 0.0336,
1333
  "step": 1740
1334
  },
1335
  {
1336
  "epoch": 29.0,
1337
+ "eval_cer": 0.15741789354473387,
1338
+ "eval_loss": 0.004836267791688442,
1339
+ "eval_runtime": 7.1076,
1340
+ "eval_samples_per_second": 67.533,
1341
+ "eval_steps_per_second": 2.11,
1342
+ "eval_wer": 0.18865826538176428,
1343
  "step": 1740
1344
  },
1345
  {
1346
  "epoch": 29.17,
1347
  "learning_rate": 1.590909090909091e-05,
1348
+ "loss": 0.0592,
1349
  "step": 1750
1350
  },
1351
  {
1352
  "epoch": 29.33,
1353
  "learning_rate": 1.5454545454545454e-05,
1354
+ "loss": 0.0499,
1355
  "step": 1760
1356
  },
1357
  {
1358
  "epoch": 29.5,
1359
  "learning_rate": 1.5e-05,
1360
+ "loss": 0.0483,
1361
  "step": 1770
1362
  },
1363
  {
1364
  "epoch": 29.67,
1365
  "learning_rate": 1.4545454545454545e-05,
1366
+ "loss": 0.0572,
1367
  "step": 1780
1368
  },
1369
  {
1370
  "epoch": 29.83,
1371
  "learning_rate": 1.409090909090909e-05,
1372
+ "loss": 0.0371,
1373
  "step": 1790
1374
  },
1375
  {
1376
  "epoch": 30.0,
1377
  "learning_rate": 1.3636363636363637e-05,
1378
+ "loss": 0.0455,
1379
  "step": 1800
1380
  },
1381
  {
1382
  "epoch": 30.0,
1383
+ "eval_cer": 0.16942242355605888,
1384
+ "eval_loss": 0.0036398672964423895,
1385
+ "eval_runtime": 7.0873,
1386
+ "eval_samples_per_second": 67.727,
1387
+ "eval_steps_per_second": 2.116,
1388
+ "eval_wer": 0.18421052631578946,
1389
  "step": 1800
1390
  },
1391
  {
1392
  "epoch": 30.17,
1393
  "learning_rate": 1.318181818181818e-05,
1394
+ "loss": 0.0679,
1395
  "step": 1810
1396
  },
1397
  {
1398
  "epoch": 30.33,
1399
  "learning_rate": 1.2727272727272727e-05,
1400
+ "loss": 0.0556,
1401
  "step": 1820
1402
  },
1403
  {
1404
  "epoch": 30.5,
1405
  "learning_rate": 1.2272727272727273e-05,
1406
+ "loss": 0.0555,
1407
  "step": 1830
1408
  },
1409
  {
1410
  "epoch": 30.67,
1411
  "learning_rate": 1.1818181818181819e-05,
1412
+ "loss": 0.0486,
1413
  "step": 1840
1414
  },
1415
  {
1416
  "epoch": 30.83,
1417
  "learning_rate": 1.1363636363636365e-05,
1418
+ "loss": 0.06,
1419
  "step": 1850
1420
  },
1421
  {
1422
  "epoch": 31.0,
1423
  "learning_rate": 1.0909090909090909e-05,
1424
+ "loss": 0.0672,
1425
  "step": 1860
1426
  },
1427
  {
1428
  "epoch": 31.0,
1429
+ "eval_cer": 0.1507361268403171,
1430
+ "eval_loss": 0.00381605327129364,
1431
+ "eval_runtime": 7.0984,
1432
+ "eval_samples_per_second": 67.621,
1433
+ "eval_steps_per_second": 2.113,
1434
+ "eval_wer": 0.18383988139362492,
1435
  "step": 1860
1436
  },
1437
  {
1438
  "epoch": 31.17,
1439
  "learning_rate": 1.0454545454545455e-05,
1440
+ "loss": 0.0414,
1441
  "step": 1870
1442
  },
1443
  {
1444
  "epoch": 31.33,
1445
  "learning_rate": 1e-05,
1446
+ "loss": 0.041,
1447
  "step": 1880
1448
  },
1449
  {
1450
  "epoch": 31.5,
1451
  "learning_rate": 9.545454545454547e-06,
1452
+ "loss": 0.0529,
1453
  "step": 1890
1454
  },
1455
  {
1456
  "epoch": 31.67,
1457
  "learning_rate": 9.090909090909091e-06,
1458
+ "loss": 0.0744,
1459
  "step": 1900
1460
  },
1461
  {
1462
  "epoch": 31.83,
1463
  "learning_rate": 8.636363636363637e-06,
1464
+ "loss": 0.0495,
1465
  "step": 1910
1466
  },
1467
  {
1468
  "epoch": 32.0,
1469
  "learning_rate": 8.181818181818183e-06,
1470
+ "loss": 0.0315,
1471
  "step": 1920
1472
  },
1473
  {
1474
  "epoch": 32.0,
1475
+ "eval_cer": 0.15549263873159683,
1476
+ "eval_loss": 0.003280089935287833,
1477
+ "eval_runtime": 7.1329,
1478
+ "eval_samples_per_second": 67.294,
1479
+ "eval_steps_per_second": 2.103,
1480
+ "eval_wer": 0.18532246108228317,
1481
  "step": 1920
1482
  },
1483
  {
1484
  "epoch": 32.17,
1485
  "learning_rate": 7.727272727272727e-06,
1486
+ "loss": 0.0525,
1487
  "step": 1930
1488
  },
1489
  {
1490
  "epoch": 32.33,
1491
  "learning_rate": 7.272727272727272e-06,
1492
+ "loss": 0.0946,
1493
  "step": 1940
1494
  },
1495
  {
1496
  "epoch": 32.5,
1497
  "learning_rate": 6.818181818181818e-06,
1498
+ "loss": 0.0302,
1499
  "step": 1950
1500
  },
1501
  {
1502
  "epoch": 32.67,
1503
  "learning_rate": 6.363636363636363e-06,
1504
+ "loss": 0.0365,
1505
  "step": 1960
1506
  },
1507
  {
1508
  "epoch": 32.83,
1509
  "learning_rate": 5.909090909090909e-06,
1510
+ "loss": 0.0357,
1511
  "step": 1970
1512
  },
1513
  {
1514
  "epoch": 33.0,
1515
  "learning_rate": 5.4545454545454545e-06,
1516
+ "loss": 0.0466,
1517
  "step": 1980
1518
  },
1519
  {
1520
  "epoch": 33.0,
1521
+ "eval_cer": 0.15685164212910532,
1522
+ "eval_loss": 0.0033369685988873243,
1523
+ "eval_runtime": 7.0736,
1524
+ "eval_samples_per_second": 67.858,
1525
+ "eval_steps_per_second": 2.121,
1526
+ "eval_wer": 0.1827279466271312,
1527
  "step": 1980
1528
  },
1529
  {
1530
  "epoch": 33.17,
1531
  "learning_rate": 5e-06,
1532
+ "loss": 0.0872,
1533
  "step": 1990
1534
  },
1535
  {
1536
  "epoch": 33.33,
1537
  "learning_rate": 4.5454545454545455e-06,
1538
+ "loss": 0.0405,
1539
  "step": 2000
1540
  },
1541
  {
1542
  "epoch": 33.5,
1543
  "learning_rate": 4.0909090909090915e-06,
1544
+ "loss": 0.0443,
1545
  "step": 2010
1546
  },
1547
  {
1548
  "epoch": 33.67,
1549
  "learning_rate": 3.636363636363636e-06,
1550
+ "loss": 0.0367,
1551
  "step": 2020
1552
  },
1553
  {
1554
  "epoch": 33.83,
1555
  "learning_rate": 3.1818181818181817e-06,
1556
+ "loss": 0.0358,
1557
  "step": 2030
1558
  },
1559
  {
1560
  "epoch": 34.0,
1561
  "learning_rate": 2.7272727272727272e-06,
1562
+ "loss": 0.0491,
1563
  "step": 2040
1564
  },
1565
  {
1566
  "epoch": 34.0,
1567
+ "eval_cer": 0.15560588901472253,
1568
+ "eval_loss": 0.0035262603778392076,
1569
+ "eval_runtime": 7.1013,
1570
+ "eval_samples_per_second": 67.593,
1571
+ "eval_steps_per_second": 2.112,
1572
+ "eval_wer": 0.18346923647146035,
1573
  "step": 2040
1574
  },
1575
  {
1576
  "epoch": 34.17,
1577
  "learning_rate": 2.2727272727272728e-06,
1578
+ "loss": 0.0409,
1579
  "step": 2050
1580
  },
1581
  {
1582
  "epoch": 34.33,
1583
  "learning_rate": 1.818181818181818e-06,
1584
+ "loss": 0.0755,
1585
  "step": 2060
1586
  },
1587
  {
1588
  "epoch": 34.5,
1589
  "learning_rate": 1.3636363636363636e-06,
1590
+ "loss": 0.0617,
1591
  "step": 2070
1592
  },
1593
  {
1594
  "epoch": 34.67,
1595
  "learning_rate": 9.09090909090909e-07,
1596
+ "loss": 0.0672,
1597
  "step": 2080
1598
  },
1599
  {
1600
  "epoch": 34.83,
1601
  "learning_rate": 4.545454545454545e-07,
1602
+ "loss": 0.0605,
1603
  "step": 2090
1604
  },
1605
  {
1606
  "epoch": 35.0,
1607
  "learning_rate": 0.0,
1608
+ "loss": 0.0315,
1609
  "step": 2100
1610
  },
1611
  {
1612
  "epoch": 35.0,
1613
+ "eval_cer": 0.15571913929784825,
1614
+ "eval_loss": 0.003189487848430872,
1615
+ "eval_runtime": 7.1308,
1616
+ "eval_samples_per_second": 67.314,
1617
+ "eval_steps_per_second": 2.104,
1618
+ "eval_wer": 0.18346923647146035,
1619
  "step": 2100
1620
  },
1621
  {
1622
  "epoch": 35.0,
1623
  "step": 2100,
1624
+ "total_flos": 5.305190022011399e+18,
1625
+ "train_loss": 1.105300986397834,
1626
+ "train_runtime": 4221.7996,
1627
+ "train_samples_per_second": 15.917,
1628
  "train_steps_per_second": 0.497
1629
  }
1630
  ],
 
1632
  "max_steps": 2100,
1633
  "num_train_epochs": 35,
1634
  "save_steps": 500,
1635
+ "total_flos": 5.305190022011399e+18,
1636
  "trial_name": null,
1637
  "trial_params": null
1638
  }