cahya commited on
Commit
5069c7b
1 Parent(s): 64ad055

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. train_results.json +8 -0
  4. trainer_state.json +610 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_loss": 8.827937126159668,
4
+ "eval_runtime": 223.6666,
5
+ "eval_samples": 4276,
6
+ "eval_samples_per_second": 19.118,
7
+ "eval_steps_per_second": 9.559,
8
+ "eval_wer": 1.0123093040005238,
9
+ "train_loss": 8.274780469063002,
10
+ "train_runtime": 14935.5952,
11
+ "train_samples": 10175,
12
+ "train_samples_per_second": 6.813,
13
+ "train_steps_per_second": 0.026
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "eval_loss": 8.827937126159668,
4
+ "eval_runtime": 223.6666,
5
+ "eval_samples": 4276,
6
+ "eval_samples_per_second": 19.118,
7
+ "eval_steps_per_second": 9.559,
8
+ "eval_wer": 1.0123093040005238
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.98,
3
+ "train_loss": 8.274780469063002,
4
+ "train_runtime": 14935.5952,
5
+ "train_samples": 10175,
6
+ "train_samples_per_second": 6.813,
7
+ "train_steps_per_second": 0.026
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.981132075471699,
5
+ "global_step": 390,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.25,
12
+ "learning_rate": 1e-08,
13
+ "loss": 8.1684,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.25,
18
+ "eval_loss": 8.85881519317627,
19
+ "eval_runtime": 219.4921,
20
+ "eval_samples_per_second": 19.481,
21
+ "eval_steps_per_second": 9.741,
22
+ "eval_wer": 1.0125384665750017,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.5,
27
+ "learning_rate": 9.736842105263159e-09,
28
+ "loss": 8.1428,
29
+ "step": 20
30
+ },
31
+ {
32
+ "epoch": 0.5,
33
+ "eval_loss": 8.856884956359863,
34
+ "eval_runtime": 224.2813,
35
+ "eval_samples_per_second": 19.065,
36
+ "eval_steps_per_second": 9.533,
37
+ "eval_wer": 1.012505729064362,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.75,
42
+ "learning_rate": 9.473684210526316e-09,
43
+ "loss": 8.1333,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.75,
48
+ "eval_loss": 8.855155944824219,
49
+ "eval_runtime": 226.0385,
50
+ "eval_samples_per_second": 18.917,
51
+ "eval_steps_per_second": 9.459,
52
+ "eval_wer": 1.0124075165324429,
53
+ "step": 30
54
+ },
55
+ {
56
+ "epoch": 1.03,
57
+ "learning_rate": 9.210526315789473e-09,
58
+ "loss": 8.7873,
59
+ "step": 40
60
+ },
61
+ {
62
+ "epoch": 1.03,
63
+ "eval_loss": 8.85318660736084,
64
+ "eval_runtime": 220.4335,
65
+ "eval_samples_per_second": 19.398,
66
+ "eval_steps_per_second": 9.699,
67
+ "eval_wer": 1.0124075165324429,
68
+ "step": 40
69
+ },
70
+ {
71
+ "epoch": 1.28,
72
+ "learning_rate": 8.947368421052632e-09,
73
+ "loss": 8.1298,
74
+ "step": 50
75
+ },
76
+ {
77
+ "epoch": 1.28,
78
+ "eval_loss": 8.851649284362793,
79
+ "eval_runtime": 224.7965,
80
+ "eval_samples_per_second": 19.022,
81
+ "eval_steps_per_second": 9.511,
82
+ "eval_wer": 1.0124075165324429,
83
+ "step": 50
84
+ },
85
+ {
86
+ "epoch": 1.53,
87
+ "learning_rate": 8.68421052631579e-09,
88
+ "loss": 8.1445,
89
+ "step": 60
90
+ },
91
+ {
92
+ "epoch": 1.53,
93
+ "eval_loss": 8.84989070892334,
94
+ "eval_runtime": 220.475,
95
+ "eval_samples_per_second": 19.394,
96
+ "eval_steps_per_second": 9.697,
97
+ "eval_wer": 1.0123420415111635,
98
+ "step": 60
99
+ },
100
+ {
101
+ "epoch": 1.78,
102
+ "learning_rate": 8.421052631578947e-09,
103
+ "loss": 8.1635,
104
+ "step": 70
105
+ },
106
+ {
107
+ "epoch": 1.78,
108
+ "eval_loss": 8.8483304977417,
109
+ "eval_runtime": 222.7151,
110
+ "eval_samples_per_second": 19.199,
111
+ "eval_steps_per_second": 9.6,
112
+ "eval_wer": 1.0124075165324429,
113
+ "step": 70
114
+ },
115
+ {
116
+ "epoch": 2.05,
117
+ "learning_rate": 8.157894736842106e-09,
118
+ "loss": 8.7587,
119
+ "step": 80
120
+ },
121
+ {
122
+ "epoch": 2.05,
123
+ "eval_loss": 8.846796989440918,
124
+ "eval_runtime": 221.1122,
125
+ "eval_samples_per_second": 19.339,
126
+ "eval_steps_per_second": 9.669,
127
+ "eval_wer": 1.0125384665750017,
128
+ "step": 80
129
+ },
130
+ {
131
+ "epoch": 2.3,
132
+ "learning_rate": 7.894736842105263e-09,
133
+ "loss": 8.1424,
134
+ "step": 90
135
+ },
136
+ {
137
+ "epoch": 2.3,
138
+ "eval_loss": 8.845438957214355,
139
+ "eval_runtime": 219.7737,
140
+ "eval_samples_per_second": 19.456,
141
+ "eval_steps_per_second": 9.728,
142
+ "eval_wer": 1.0124075165324429,
143
+ "step": 90
144
+ },
145
+ {
146
+ "epoch": 2.55,
147
+ "learning_rate": 7.631578947368422e-09,
148
+ "loss": 8.1318,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 2.55,
153
+ "eval_loss": 8.844048500061035,
154
+ "eval_runtime": 222.4326,
155
+ "eval_samples_per_second": 19.224,
156
+ "eval_steps_per_second": 9.612,
157
+ "eval_wer": 1.0124402540430826,
158
+ "step": 100
159
+ },
160
+ {
161
+ "epoch": 2.81,
162
+ "learning_rate": 7.368421052631579e-09,
163
+ "loss": 8.1469,
164
+ "step": 110
165
+ },
166
+ {
167
+ "epoch": 2.81,
168
+ "eval_loss": 8.842790603637695,
169
+ "eval_runtime": 224.1044,
170
+ "eval_samples_per_second": 19.08,
171
+ "eval_steps_per_second": 9.54,
172
+ "eval_wer": 1.012505729064362,
173
+ "step": 110
174
+ },
175
+ {
176
+ "epoch": 3.08,
177
+ "learning_rate": 7.105263157894737e-09,
178
+ "loss": 8.7602,
179
+ "step": 120
180
+ },
181
+ {
182
+ "epoch": 3.08,
183
+ "eval_loss": 8.841601371765137,
184
+ "eval_runtime": 222.7003,
185
+ "eval_samples_per_second": 19.201,
186
+ "eval_steps_per_second": 9.6,
187
+ "eval_wer": 1.0124729915537223,
188
+ "step": 120
189
+ },
190
+ {
191
+ "epoch": 3.33,
192
+ "learning_rate": 6.842105263157895e-09,
193
+ "loss": 8.1584,
194
+ "step": 130
195
+ },
196
+ {
197
+ "epoch": 3.33,
198
+ "eval_loss": 8.840473175048828,
199
+ "eval_runtime": 220.9442,
200
+ "eval_samples_per_second": 19.353,
201
+ "eval_steps_per_second": 9.677,
202
+ "eval_wer": 1.0125712040856414,
203
+ "step": 130
204
+ },
205
+ {
206
+ "epoch": 3.58,
207
+ "learning_rate": 6.578947368421054e-09,
208
+ "loss": 8.142,
209
+ "step": 140
210
+ },
211
+ {
212
+ "epoch": 3.58,
213
+ "eval_loss": 8.839417457580566,
214
+ "eval_runtime": 223.4762,
215
+ "eval_samples_per_second": 19.134,
216
+ "eval_steps_per_second": 9.567,
217
+ "eval_wer": 1.0125712040856414,
218
+ "step": 140
219
+ },
220
+ {
221
+ "epoch": 3.83,
222
+ "learning_rate": 6.31578947368421e-09,
223
+ "loss": 8.1285,
224
+ "step": 150
225
+ },
226
+ {
227
+ "epoch": 3.83,
228
+ "eval_loss": 8.838351249694824,
229
+ "eval_runtime": 225.8637,
230
+ "eval_samples_per_second": 18.932,
231
+ "eval_steps_per_second": 9.466,
232
+ "eval_wer": 1.0124075165324429,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 4.1,
237
+ "learning_rate": 6.052631578947369e-09,
238
+ "loss": 8.7756,
239
+ "step": 160
240
+ },
241
+ {
242
+ "epoch": 4.1,
243
+ "eval_loss": 8.837142944335938,
244
+ "eval_runtime": 224.8527,
245
+ "eval_samples_per_second": 19.017,
246
+ "eval_steps_per_second": 9.508,
247
+ "eval_wer": 1.0124075165324429,
248
+ "step": 160
249
+ },
250
+ {
251
+ "epoch": 4.35,
252
+ "learning_rate": 5.789473684210527e-09,
253
+ "loss": 8.0991,
254
+ "step": 170
255
+ },
256
+ {
257
+ "epoch": 4.35,
258
+ "eval_loss": 8.83634090423584,
259
+ "eval_runtime": 220.3123,
260
+ "eval_samples_per_second": 19.409,
261
+ "eval_steps_per_second": 9.704,
262
+ "eval_wer": 1.0124729915537223,
263
+ "step": 170
264
+ },
265
+ {
266
+ "epoch": 4.6,
267
+ "learning_rate": 5.526315789473685e-09,
268
+ "loss": 8.1442,
269
+ "step": 180
270
+ },
271
+ {
272
+ "epoch": 4.6,
273
+ "eval_loss": 8.83536434173584,
274
+ "eval_runtime": 224.5432,
275
+ "eval_samples_per_second": 19.043,
276
+ "eval_steps_per_second": 9.522,
277
+ "eval_wer": 1.0124402540430826,
278
+ "step": 180
279
+ },
280
+ {
281
+ "epoch": 4.86,
282
+ "learning_rate": 5.263157894736842e-09,
283
+ "loss": 8.1294,
284
+ "step": 190
285
+ },
286
+ {
287
+ "epoch": 4.86,
288
+ "eval_loss": 8.834578514099121,
289
+ "eval_runtime": 220.0402,
290
+ "eval_samples_per_second": 19.433,
291
+ "eval_steps_per_second": 9.716,
292
+ "eval_wer": 1.0124075165324429,
293
+ "step": 190
294
+ },
295
+ {
296
+ "epoch": 5.13,
297
+ "learning_rate": 5e-09,
298
+ "loss": 8.7276,
299
+ "step": 200
300
+ },
301
+ {
302
+ "epoch": 5.13,
303
+ "eval_loss": 8.833772659301758,
304
+ "eval_runtime": 224.0823,
305
+ "eval_samples_per_second": 19.082,
306
+ "eval_steps_per_second": 9.541,
307
+ "eval_wer": 1.0125384665750017,
308
+ "step": 200
309
+ },
310
+ {
311
+ "epoch": 5.38,
312
+ "learning_rate": 4.736842105263158e-09,
313
+ "loss": 8.1439,
314
+ "step": 210
315
+ },
316
+ {
317
+ "epoch": 5.38,
318
+ "eval_loss": 8.832892417907715,
319
+ "eval_runtime": 220.6908,
320
+ "eval_samples_per_second": 19.376,
321
+ "eval_steps_per_second": 9.688,
322
+ "eval_wer": 1.0124402540430826,
323
+ "step": 210
324
+ },
325
+ {
326
+ "epoch": 5.63,
327
+ "learning_rate": 4.473684210526316e-09,
328
+ "loss": 8.1115,
329
+ "step": 220
330
+ },
331
+ {
332
+ "epoch": 5.63,
333
+ "eval_loss": 8.832157135009766,
334
+ "eval_runtime": 221.8649,
335
+ "eval_samples_per_second": 19.273,
336
+ "eval_steps_per_second": 9.636,
337
+ "eval_wer": 1.0124402540430826,
338
+ "step": 220
339
+ },
340
+ {
341
+ "epoch": 5.88,
342
+ "learning_rate": 4.210526315789473e-09,
343
+ "loss": 8.1501,
344
+ "step": 230
345
+ },
346
+ {
347
+ "epoch": 5.88,
348
+ "eval_loss": 8.831602096557617,
349
+ "eval_runtime": 223.55,
350
+ "eval_samples_per_second": 19.128,
351
+ "eval_steps_per_second": 9.564,
352
+ "eval_wer": 1.0125384665750017,
353
+ "step": 230
354
+ },
355
+ {
356
+ "epoch": 6.15,
357
+ "learning_rate": 3.947368421052631e-09,
358
+ "loss": 8.7143,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 6.15,
363
+ "eval_loss": 8.830825805664062,
364
+ "eval_runtime": 224.3279,
365
+ "eval_samples_per_second": 19.061,
366
+ "eval_steps_per_second": 9.531,
367
+ "eval_wer": 1.0124075165324429,
368
+ "step": 240
369
+ },
370
+ {
371
+ "epoch": 6.4,
372
+ "learning_rate": 3.6842105263157894e-09,
373
+ "loss": 8.143,
374
+ "step": 250
375
+ },
376
+ {
377
+ "epoch": 6.4,
378
+ "eval_loss": 8.830228805541992,
379
+ "eval_runtime": 225.4738,
380
+ "eval_samples_per_second": 18.965,
381
+ "eval_steps_per_second": 9.482,
382
+ "eval_wer": 1.0123747790218032,
383
+ "step": 250
384
+ },
385
+ {
386
+ "epoch": 6.65,
387
+ "learning_rate": 3.4210526315789474e-09,
388
+ "loss": 8.1528,
389
+ "step": 260
390
+ },
391
+ {
392
+ "epoch": 6.65,
393
+ "eval_loss": 8.829960823059082,
394
+ "eval_runtime": 222.8802,
395
+ "eval_samples_per_second": 19.185,
396
+ "eval_steps_per_second": 9.593,
397
+ "eval_wer": 1.0124729915537223,
398
+ "step": 260
399
+ },
400
+ {
401
+ "epoch": 6.91,
402
+ "learning_rate": 3.1842105263157894e-09,
403
+ "loss": 8.1293,
404
+ "step": 270
405
+ },
406
+ {
407
+ "epoch": 6.91,
408
+ "eval_loss": 8.829716682434082,
409
+ "eval_runtime": 223.7307,
410
+ "eval_samples_per_second": 19.112,
411
+ "eval_steps_per_second": 9.556,
412
+ "eval_wer": 1.0124075165324429,
413
+ "step": 270
414
+ },
415
+ {
416
+ "epoch": 7.18,
417
+ "learning_rate": 2.9210526315789475e-09,
418
+ "loss": 8.7519,
419
+ "step": 280
420
+ },
421
+ {
422
+ "epoch": 7.18,
423
+ "eval_loss": 8.829301834106445,
424
+ "eval_runtime": 223.0404,
425
+ "eval_samples_per_second": 19.171,
426
+ "eval_steps_per_second": 9.586,
427
+ "eval_wer": 1.0124729915537223,
428
+ "step": 280
429
+ },
430
+ {
431
+ "epoch": 7.43,
432
+ "learning_rate": 2.657894736842105e-09,
433
+ "loss": 8.1153,
434
+ "step": 290
435
+ },
436
+ {
437
+ "epoch": 7.43,
438
+ "eval_loss": 8.828947067260742,
439
+ "eval_runtime": 219.8129,
440
+ "eval_samples_per_second": 19.453,
441
+ "eval_steps_per_second": 9.726,
442
+ "eval_wer": 1.0124075165324429,
443
+ "step": 290
444
+ },
445
+ {
446
+ "epoch": 7.68,
447
+ "learning_rate": 2.394736842105263e-09,
448
+ "loss": 8.1292,
449
+ "step": 300
450
+ },
451
+ {
452
+ "epoch": 7.68,
453
+ "eval_loss": 8.828753471374512,
454
+ "eval_runtime": 222.9513,
455
+ "eval_samples_per_second": 19.179,
456
+ "eval_steps_per_second": 9.59,
457
+ "eval_wer": 1.0124402540430826,
458
+ "step": 300
459
+ },
460
+ {
461
+ "epoch": 7.93,
462
+ "learning_rate": 2.131578947368421e-09,
463
+ "loss": 8.0904,
464
+ "step": 310
465
+ },
466
+ {
467
+ "epoch": 7.93,
468
+ "eval_loss": 8.828449249267578,
469
+ "eval_runtime": 224.0134,
470
+ "eval_samples_per_second": 19.088,
471
+ "eval_steps_per_second": 9.544,
472
+ "eval_wer": 1.0124075165324429,
473
+ "step": 310
474
+ },
475
+ {
476
+ "epoch": 8.2,
477
+ "learning_rate": 1.868421052631579e-09,
478
+ "loss": 8.7425,
479
+ "step": 320
480
+ },
481
+ {
482
+ "epoch": 8.2,
483
+ "eval_loss": 8.828290939331055,
484
+ "eval_runtime": 219.9475,
485
+ "eval_samples_per_second": 19.441,
486
+ "eval_steps_per_second": 9.721,
487
+ "eval_wer": 1.0125384665750017,
488
+ "step": 320
489
+ },
490
+ {
491
+ "epoch": 8.45,
492
+ "learning_rate": 1.605263157894737e-09,
493
+ "loss": 8.0963,
494
+ "step": 330
495
+ },
496
+ {
497
+ "epoch": 8.45,
498
+ "eval_loss": 8.828081130981445,
499
+ "eval_runtime": 222.5212,
500
+ "eval_samples_per_second": 19.216,
501
+ "eval_steps_per_second": 9.608,
502
+ "eval_wer": 1.0124075165324429,
503
+ "step": 330
504
+ },
505
+ {
506
+ "epoch": 8.7,
507
+ "learning_rate": 1.3421052631578948e-09,
508
+ "loss": 8.1112,
509
+ "step": 340
510
+ },
511
+ {
512
+ "epoch": 8.7,
513
+ "eval_loss": 8.828051567077637,
514
+ "eval_runtime": 222.696,
515
+ "eval_samples_per_second": 19.201,
516
+ "eval_steps_per_second": 9.601,
517
+ "eval_wer": 1.0124402540430826,
518
+ "step": 340
519
+ },
520
+ {
521
+ "epoch": 8.96,
522
+ "learning_rate": 1.0789473684210528e-09,
523
+ "loss": 8.124,
524
+ "step": 350
525
+ },
526
+ {
527
+ "epoch": 8.96,
528
+ "eval_loss": 8.828123092651367,
529
+ "eval_runtime": 222.2122,
530
+ "eval_samples_per_second": 19.243,
531
+ "eval_steps_per_second": 9.621,
532
+ "eval_wer": 1.012505729064362,
533
+ "step": 350
534
+ },
535
+ {
536
+ "epoch": 9.23,
537
+ "learning_rate": 8.157894736842106e-10,
538
+ "loss": 8.7327,
539
+ "step": 360
540
+ },
541
+ {
542
+ "epoch": 9.23,
543
+ "eval_loss": 8.827865600585938,
544
+ "eval_runtime": 222.5189,
545
+ "eval_samples_per_second": 19.216,
546
+ "eval_steps_per_second": 9.608,
547
+ "eval_wer": 1.0123420415111635,
548
+ "step": 360
549
+ },
550
+ {
551
+ "epoch": 9.48,
552
+ "learning_rate": 5.526315789473684e-10,
553
+ "loss": 8.1261,
554
+ "step": 370
555
+ },
556
+ {
557
+ "epoch": 9.48,
558
+ "eval_loss": 8.827857971191406,
559
+ "eval_runtime": 224.6034,
560
+ "eval_samples_per_second": 19.038,
561
+ "eval_steps_per_second": 9.519,
562
+ "eval_wer": 1.012603941596281,
563
+ "step": 370
564
+ },
565
+ {
566
+ "epoch": 9.73,
567
+ "learning_rate": 2.894736842105263e-10,
568
+ "loss": 8.1259,
569
+ "step": 380
570
+ },
571
+ {
572
+ "epoch": 9.73,
573
+ "eval_loss": 8.827925682067871,
574
+ "eval_runtime": 223.0189,
575
+ "eval_samples_per_second": 19.173,
576
+ "eval_steps_per_second": 9.587,
577
+ "eval_wer": 1.0123747790218032,
578
+ "step": 380
579
+ },
580
+ {
581
+ "epoch": 9.98,
582
+ "learning_rate": 2.631578947368421e-11,
583
+ "loss": 8.1116,
584
+ "step": 390
585
+ },
586
+ {
587
+ "epoch": 9.98,
588
+ "eval_loss": 8.827937126159668,
589
+ "eval_runtime": 224.7494,
590
+ "eval_samples_per_second": 19.026,
591
+ "eval_steps_per_second": 9.513,
592
+ "eval_wer": 1.0123093040005238,
593
+ "step": 390
594
+ },
595
+ {
596
+ "epoch": 9.98,
597
+ "step": 390,
598
+ "total_flos": 1.7181016563618468e+19,
599
+ "train_loss": 8.274780469063002,
600
+ "train_runtime": 14935.5952,
601
+ "train_samples_per_second": 6.813,
602
+ "train_steps_per_second": 0.026
603
+ }
604
+ ],
605
+ "max_steps": 390,
606
+ "num_train_epochs": 10,
607
+ "total_flos": 1.7181016563618468e+19,
608
+ "trial_name": null,
609
+ "trial_params": null
610
+ }