File size: 16,764 Bytes
5de6049
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 17.770034843205575,
  "global_step": 5100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.89,
      "learning_rate": 2.4902343750000002e-05,
      "loss": 3.791,
      "step": 255
    },
    {
      "epoch": 0.89,
      "eval_average_rogue": 0.2125,
      "eval_loss": 3.328416347503662,
      "eval_rouge1_fmeasure": 0.3288,
      "eval_rouge1_precision": 0.3115,
      "eval_rouge1_recall": 0.3637,
      "eval_rouge2_fmeasure": 0.06,
      "eval_rouge2_precision": 0.0569,
      "eval_rouge2_recall": 0.0664,
      "eval_rougeL_fmeasure": 0.145,
      "eval_rougeL_precision": 0.1366,
      "eval_rougeL_recall": 0.1631,
      "eval_rougeLsum_fmeasure": 0.3161,
      "eval_rougeLsum_precision": 0.2997,
      "eval_rougeLsum_recall": 0.3496,
      "eval_runtime": 3350.2964,
      "eval_samples_per_second": 0.038,
      "eval_steps_per_second": 0.038,
      "step": 255
    },
    {
      "epoch": 1.78,
      "learning_rate": 4.9804687500000004e-05,
      "loss": 3.3793,
      "step": 510
    },
    {
      "epoch": 1.78,
      "eval_average_rogue": 0.2109,
      "eval_loss": 3.268017292022705,
      "eval_rouge1_fmeasure": 0.3285,
      "eval_rouge1_precision": 0.3068,
      "eval_rouge1_recall": 0.3699,
      "eval_rouge2_fmeasure": 0.0571,
      "eval_rouge2_precision": 0.053,
      "eval_rouge2_recall": 0.065,
      "eval_rougeL_fmeasure": 0.1417,
      "eval_rougeL_precision": 0.1315,
      "eval_rougeL_recall": 0.1631,
      "eval_rougeLsum_fmeasure": 0.3163,
      "eval_rougeLsum_precision": 0.2954,
      "eval_rougeLsum_recall": 0.356,
      "eval_runtime": 3338.0119,
      "eval_samples_per_second": 0.038,
      "eval_steps_per_second": 0.038,
      "step": 510
    },
    {
      "epoch": 2.67,
      "learning_rate": 4.758033664881408e-05,
      "loss": 3.1826,
      "step": 765
    },
    {
      "epoch": 2.67,
      "eval_average_rogue": 0.214,
      "eval_loss": 3.245297431945801,
      "eval_rouge1_fmeasure": 0.3293,
      "eval_rouge1_precision": 0.3099,
      "eval_rouge1_recall": 0.3668,
      "eval_rouge2_fmeasure": 0.0644,
      "eval_rouge2_precision": 0.0605,
      "eval_rouge2_recall": 0.0717,
      "eval_rougeL_fmeasure": 0.145,
      "eval_rougeL_precision": 0.1356,
      "eval_rougeL_recall": 0.1647,
      "eval_rougeLsum_fmeasure": 0.3173,
      "eval_rougeLsum_precision": 0.2986,
      "eval_rougeLsum_recall": 0.3531,
      "eval_runtime": 3330.8537,
      "eval_samples_per_second": 0.038,
      "eval_steps_per_second": 0.038,
      "step": 765
    },
    {
      "epoch": 3.55,
      "learning_rate": 4.5141545524101e-05,
      "loss": 3.0133,
      "step": 1020
    },
    {
      "epoch": 3.55,
      "eval_average_rogue": 0.1948,
      "eval_loss": 3.2580819129943848,
      "eval_rouge1_fmeasure": 0.2983,
      "eval_rouge1_precision": 0.2893,
      "eval_rouge1_recall": 0.3239,
      "eval_rouge2_fmeasure": 0.0545,
      "eval_rouge2_precision": 0.0527,
      "eval_rouge2_recall": 0.0591,
      "eval_rougeL_fmeasure": 0.1397,
      "eval_rougeL_precision": 0.1356,
      "eval_rougeL_recall": 0.1534,
      "eval_rougeLsum_fmeasure": 0.2865,
      "eval_rougeLsum_precision": 0.2776,
      "eval_rougeLsum_recall": 0.3105,
      "eval_runtime": 3289.5263,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 1020
    },
    {
      "epoch": 4.44,
      "learning_rate": 4.2702754399387915e-05,
      "loss": 2.8569,
      "step": 1275
    },
    {
      "epoch": 4.44,
      "eval_average_rogue": 0.2256,
      "eval_loss": 3.2716007232666016,
      "eval_rouge1_fmeasure": 0.3491,
      "eval_rouge1_precision": 0.3326,
      "eval_rouge1_recall": 0.3835,
      "eval_rouge2_fmeasure": 0.0671,
      "eval_rouge2_precision": 0.0641,
      "eval_rouge2_recall": 0.0732,
      "eval_rougeL_fmeasure": 0.1516,
      "eval_rougeL_precision": 0.1436,
      "eval_rougeL_recall": 0.1701,
      "eval_rougeLsum_fmeasure": 0.3346,
      "eval_rougeLsum_precision": 0.3189,
      "eval_rougeLsum_recall": 0.368,
      "eval_runtime": 3297.3203,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 1275
    },
    {
      "epoch": 5.33,
      "learning_rate": 4.026396327467483e-05,
      "loss": 2.7008,
      "step": 1530
    },
    {
      "epoch": 5.33,
      "eval_average_rogue": 0.2285,
      "eval_loss": 3.313244104385376,
      "eval_rouge1_fmeasure": 0.3531,
      "eval_rouge1_precision": 0.3326,
      "eval_rouge1_recall": 0.3925,
      "eval_rouge2_fmeasure": 0.0693,
      "eval_rouge2_precision": 0.0653,
      "eval_rouge2_recall": 0.0772,
      "eval_rougeL_fmeasure": 0.1521,
      "eval_rougeL_precision": 0.1427,
      "eval_rougeL_recall": 0.1725,
      "eval_rougeLsum_fmeasure": 0.3396,
      "eval_rougeLsum_precision": 0.3202,
      "eval_rougeLsum_recall": 0.3776,
      "eval_runtime": 3297.2909,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 1530
    },
    {
      "epoch": 6.22,
      "learning_rate": 3.7825172149961744e-05,
      "loss": 2.5657,
      "step": 1785
    },
    {
      "epoch": 6.22,
      "eval_average_rogue": 0.2241,
      "eval_loss": 3.349586009979248,
      "eval_rouge1_fmeasure": 0.3473,
      "eval_rouge1_precision": 0.3291,
      "eval_rouge1_recall": 0.385,
      "eval_rouge2_fmeasure": 0.0658,
      "eval_rouge2_precision": 0.0622,
      "eval_rouge2_recall": 0.0731,
      "eval_rougeL_fmeasure": 0.1504,
      "eval_rougeL_precision": 0.1416,
      "eval_rougeL_recall": 0.17,
      "eval_rougeLsum_fmeasure": 0.3326,
      "eval_rougeLsum_precision": 0.3154,
      "eval_rougeLsum_recall": 0.3683,
      "eval_runtime": 3292.4741,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 1785
    },
    {
      "epoch": 7.11,
      "learning_rate": 3.538638102524866e-05,
      "loss": 2.4327,
      "step": 2040
    },
    {
      "epoch": 7.11,
      "eval_average_rogue": 0.2298,
      "eval_loss": 3.437542200088501,
      "eval_rouge1_fmeasure": 0.3551,
      "eval_rouge1_precision": 0.3347,
      "eval_rouge1_recall": 0.3946,
      "eval_rouge2_fmeasure": 0.0703,
      "eval_rouge2_precision": 0.0664,
      "eval_rouge2_recall": 0.0778,
      "eval_rougeL_fmeasure": 0.1527,
      "eval_rougeL_precision": 0.1433,
      "eval_rougeL_recall": 0.1732,
      "eval_rougeLsum_fmeasure": 0.341,
      "eval_rougeLsum_precision": 0.3218,
      "eval_rougeLsum_recall": 0.3788,
      "eval_runtime": 3284.3798,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 2040
    },
    {
      "epoch": 8.0,
      "learning_rate": 3.294758990053558e-05,
      "loss": 2.3352,
      "step": 2295
    },
    {
      "epoch": 8.0,
      "eval_average_rogue": 0.2284,
      "eval_loss": 3.4355413913726807,
      "eval_rouge1_fmeasure": 0.3551,
      "eval_rouge1_precision": 0.3334,
      "eval_rouge1_recall": 0.397,
      "eval_rouge2_fmeasure": 0.0682,
      "eval_rouge2_precision": 0.064,
      "eval_rouge2_recall": 0.0762,
      "eval_rougeL_fmeasure": 0.1507,
      "eval_rougeL_precision": 0.1406,
      "eval_rougeL_recall": 0.1721,
      "eval_rougeLsum_fmeasure": 0.3396,
      "eval_rougeLsum_precision": 0.3189,
      "eval_rougeLsum_recall": 0.3797,
      "eval_runtime": 3319.5759,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 2295
    },
    {
      "epoch": 8.89,
      "learning_rate": 3.0508798775822494e-05,
      "loss": 2.2068,
      "step": 2550
    },
    {
      "epoch": 8.89,
      "eval_average_rogue": 0.2319,
      "eval_loss": 3.517580032348633,
      "eval_rouge1_fmeasure": 0.3593,
      "eval_rouge1_precision": 0.3378,
      "eval_rouge1_recall": 0.4005,
      "eval_rouge2_fmeasure": 0.0711,
      "eval_rouge2_precision": 0.0669,
      "eval_rouge2_recall": 0.0793,
      "eval_rougeL_fmeasure": 0.153,
      "eval_rougeL_precision": 0.1431,
      "eval_rougeL_recall": 0.1738,
      "eval_rougeLsum_fmeasure": 0.3441,
      "eval_rougeLsum_precision": 0.3239,
      "eval_rougeLsum_recall": 0.383,
      "eval_runtime": 3268.2597,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 2550
    },
    {
      "epoch": 9.77,
      "learning_rate": 2.8070007651109415e-05,
      "loss": 2.1177,
      "step": 2805
    },
    {
      "epoch": 9.77,
      "eval_average_rogue": 0.2322,
      "eval_loss": 3.5942444801330566,
      "eval_rouge1_fmeasure": 0.3615,
      "eval_rouge1_precision": 0.3407,
      "eval_rouge1_recall": 0.402,
      "eval_rouge2_fmeasure": 0.0691,
      "eval_rouge2_precision": 0.0652,
      "eval_rouge2_recall": 0.0771,
      "eval_rougeL_fmeasure": 0.1516,
      "eval_rougeL_precision": 0.1422,
      "eval_rougeL_recall": 0.1722,
      "eval_rougeLsum_fmeasure": 0.3465,
      "eval_rougeLsum_precision": 0.3267,
      "eval_rougeLsum_recall": 0.3853,
      "eval_runtime": 3329.9183,
      "eval_samples_per_second": 0.038,
      "eval_steps_per_second": 0.038,
      "step": 2805
    },
    {
      "epoch": 10.66,
      "learning_rate": 2.563121652639633e-05,
      "loss": 2.0452,
      "step": 3060
    },
    {
      "epoch": 10.66,
      "eval_average_rogue": 0.2329,
      "eval_loss": 3.618927001953125,
      "eval_rouge1_fmeasure": 0.3627,
      "eval_rouge1_precision": 0.3451,
      "eval_rouge1_recall": 0.3992,
      "eval_rouge2_fmeasure": 0.0688,
      "eval_rouge2_precision": 0.0655,
      "eval_rouge2_recall": 0.0756,
      "eval_rougeL_fmeasure": 0.152,
      "eval_rougeL_precision": 0.1437,
      "eval_rougeL_recall": 0.1712,
      "eval_rougeLsum_fmeasure": 0.3482,
      "eval_rougeLsum_precision": 0.3317,
      "eval_rougeLsum_recall": 0.383,
      "eval_runtime": 3288.5395,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 3060
    },
    {
      "epoch": 11.55,
      "learning_rate": 2.3192425401683247e-05,
      "loss": 1.9276,
      "step": 3315
    },
    {
      "epoch": 11.55,
      "eval_average_rogue": 0.235,
      "eval_loss": 3.685786247253418,
      "eval_rouge1_fmeasure": 0.3662,
      "eval_rouge1_precision": 0.3468,
      "eval_rouge1_recall": 0.4051,
      "eval_rouge2_fmeasure": 0.0706,
      "eval_rouge2_precision": 0.0668,
      "eval_rouge2_recall": 0.0788,
      "eval_rougeL_fmeasure": 0.1525,
      "eval_rougeL_precision": 0.1437,
      "eval_rougeL_recall": 0.1723,
      "eval_rougeLsum_fmeasure": 0.3508,
      "eval_rougeLsum_precision": 0.3328,
      "eval_rougeLsum_recall": 0.3883,
      "eval_runtime": 3283.2817,
      "eval_samples_per_second": 0.039,
      "eval_steps_per_second": 0.039,
      "step": 3315
    },
    {
      "epoch": 12.44,
      "learning_rate": 2.0753634276970162e-05,
      "loss": 1.9006,
      "step": 3570
    },
    {
      "epoch": 12.44,
      "eval_average_rogue": 0.2331,
      "eval_loss": 3.7175817489624023,
      "eval_rouge1_fmeasure": 0.3627,
      "eval_rouge1_precision": 0.3429,
      "eval_rouge1_recall": 0.4025,
      "eval_rouge2_fmeasure": 0.0698,
      "eval_rouge2_precision": 0.0659,
      "eval_rouge2_recall": 0.0783,
      "eval_rougeL_fmeasure": 0.152,
      "eval_rougeL_precision": 0.1429,
      "eval_rougeL_recall": 0.1722,
      "eval_rougeLsum_fmeasure": 0.3477,
      "eval_rougeLsum_precision": 0.329,
      "eval_rougeLsum_recall": 0.386,
      "eval_runtime": 3384.322,
      "eval_samples_per_second": 0.038,
      "eval_steps_per_second": 0.038,
      "step": 3570
    },
    {
      "epoch": 13.33,
      "learning_rate": 1.8314843152257076e-05,
      "loss": 1.8247,
      "step": 3825
    },
    {
      "epoch": 13.33,
      "eval_average_rogue": 0.2358,
      "eval_loss": 3.7242326736450195,
      "eval_rouge1_fmeasure": 0.3679,
      "eval_rouge1_precision": 0.3481,
      "eval_rouge1_recall": 0.4077,
      "eval_rouge2_fmeasure": 0.0698,
      "eval_rouge2_precision": 0.066,
      "eval_rouge2_recall": 0.0773,
      "eval_rougeL_fmeasure": 0.1537,
      "eval_rougeL_precision": 0.1444,
      "eval_rougeL_recall": 0.1741,
      "eval_rougeLsum_fmeasure": 0.3517,
      "eval_rougeLsum_precision": 0.3323,
      "eval_rougeLsum_recall": 0.3894,
      "eval_runtime": 3438.3144,
      "eval_samples_per_second": 0.037,
      "eval_steps_per_second": 0.037,
      "step": 3825
    },
    {
      "epoch": 14.22,
      "learning_rate": 1.5876052027543994e-05,
      "loss": 1.7352,
      "step": 4080
    },
    {
      "epoch": 14.22,
      "eval_average_rogue": 0.2336,
      "eval_loss": 3.790210723876953,
      "eval_rouge1_fmeasure": 0.3648,
      "eval_rouge1_precision": 0.3457,
      "eval_rouge1_recall": 0.4025,
      "eval_rouge2_fmeasure": 0.0702,
      "eval_rouge2_precision": 0.0667,
      "eval_rouge2_recall": 0.0776,
      "eval_rougeL_fmeasure": 0.1508,
      "eval_rougeL_precision": 0.1422,
      "eval_rougeL_recall": 0.1702,
      "eval_rougeLsum_fmeasure": 0.3486,
      "eval_rougeLsum_precision": 0.3307,
      "eval_rougeLsum_recall": 0.3849,
      "eval_runtime": 3429.0398,
      "eval_samples_per_second": 0.037,
      "eval_steps_per_second": 0.037,
      "step": 4080
    },
    {
      "epoch": 15.1,
      "learning_rate": 1.3437260902830912e-05,
      "loss": 1.7091,
      "step": 4335
    },
    {
      "epoch": 15.1,
      "eval_average_rogue": 0.2351,
      "eval_loss": 3.8391542434692383,
      "eval_rouge1_fmeasure": 0.3664,
      "eval_rouge1_precision": 0.346,
      "eval_rouge1_recall": 0.4069,
      "eval_rouge2_fmeasure": 0.0706,
      "eval_rouge2_precision": 0.0666,
      "eval_rouge2_recall": 0.0786,
      "eval_rougeL_fmeasure": 0.1527,
      "eval_rougeL_precision": 0.1435,
      "eval_rougeL_recall": 0.1728,
      "eval_rougeLsum_fmeasure": 0.3509,
      "eval_rougeLsum_precision": 0.331,
      "eval_rougeLsum_recall": 0.3892,
      "eval_runtime": 3445.0892,
      "eval_samples_per_second": 0.037,
      "eval_steps_per_second": 0.037,
      "step": 4335
    },
    {
      "epoch": 15.99,
      "learning_rate": 1.0998469778117827e-05,
      "loss": 1.654,
      "step": 4590
    },
    {
      "epoch": 15.99,
      "eval_average_rogue": 0.2347,
      "eval_loss": 3.8251237869262695,
      "eval_rouge1_fmeasure": 0.3674,
      "eval_rouge1_precision": 0.3475,
      "eval_rouge1_recall": 0.4065,
      "eval_rouge2_fmeasure": 0.0692,
      "eval_rouge2_precision": 0.0655,
      "eval_rouge2_recall": 0.0767,
      "eval_rougeL_fmeasure": 0.1515,
      "eval_rougeL_precision": 0.1425,
      "eval_rougeL_recall": 0.1713,
      "eval_rougeLsum_fmeasure": 0.3508,
      "eval_rougeLsum_precision": 0.3318,
      "eval_rougeLsum_recall": 0.3883,
      "eval_runtime": 3431.7145,
      "eval_samples_per_second": 0.037,
      "eval_steps_per_second": 0.037,
      "step": 4590
    },
    {
      "epoch": 16.88,
      "learning_rate": 8.559678653404744e-06,
      "loss": 1.6034,
      "step": 4845
    },
    {
      "epoch": 16.88,
      "eval_average_rogue": 0.2342,
      "eval_loss": 3.8599014282226562,
      "eval_rouge1_fmeasure": 0.3653,
      "eval_rouge1_precision": 0.3449,
      "eval_rouge1_recall": 0.4056,
      "eval_rouge2_fmeasure": 0.0694,
      "eval_rouge2_precision": 0.0655,
      "eval_rouge2_recall": 0.0771,
      "eval_rougeL_fmeasure": 0.1531,
      "eval_rougeL_precision": 0.1438,
      "eval_rougeL_recall": 0.1738,
      "eval_rougeLsum_fmeasure": 0.3491,
      "eval_rougeLsum_precision": 0.3295,
      "eval_rougeLsum_recall": 0.3877,
      "eval_runtime": 3434.4477,
      "eval_samples_per_second": 0.037,
      "eval_steps_per_second": 0.037,
      "step": 4845
    },
    {
      "epoch": 17.77,
      "learning_rate": 6.120887528691661e-06,
      "loss": 1.5801,
      "step": 5100
    },
    {
      "epoch": 17.77,
      "eval_average_rogue": 0.2336,
      "eval_loss": 3.8798491954803467,
      "eval_rouge1_fmeasure": 0.3647,
      "eval_rouge1_precision": 0.3432,
      "eval_rouge1_recall": 0.4057,
      "eval_rouge2_fmeasure": 0.0682,
      "eval_rouge2_precision": 0.0643,
      "eval_rouge2_recall": 0.0758,
      "eval_rougeL_fmeasure": 0.1523,
      "eval_rougeL_precision": 0.1426,
      "eval_rougeL_recall": 0.1732,
      "eval_rougeLsum_fmeasure": 0.3491,
      "eval_rougeLsum_precision": 0.329,
      "eval_rougeLsum_recall": 0.3882,
      "eval_runtime": 3446.3776,
      "eval_samples_per_second": 0.037,
      "eval_steps_per_second": 0.037,
      "step": 5100
    }
  ],
  "max_steps": 5740,
  "num_train_epochs": 20,
  "total_flos": 1.37710446575616e+16,
  "trial_name": null,
  "trial_params": null
}