TusharJoshi89 commited on
Commit
b3f0dfe
1 Parent(s): 2dd04c4

fix: Uploading summarizer

Browse files
Files changed (7) hide show
  1. config.json +1 -1
  2. optimizer.pt +2 -2
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +286 -350
  7. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "t5-small",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "TusharJoshi89/medical-research-title-generator",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f84b3cc9fb8e9717544736645adf6cd4b86a75e35e579416077c9d1743ce9e41
3
- size 484130565
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8138409839f3271d64fddaf61d9388d19eff4847f49244d6598d22c1be08f7f4
3
+ size 484130629
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa51035225c92add97277af43f4abfa1b8e0fff2b471b95315e9d09dba25e46e
3
  size 242071641
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45d9f210a59994931e6bebed8d928bb7b91b9d0221c0ae617e0e3d7da0795431
3
  size 242071641
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ff6959ed8d1291fd55400668e43aee7512eb604c51c6ffc6749f160bd1ff3d1
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601c80416e72a4a9ad433190bad47bcabb8046f27bc84cb9117568b6f3fb9f9e
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daaedac35250ef1ced3437442c43e80298f6c3c1a8e743335e3ed835bcb73680
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3b2a129639e4a3c47b487288feb3a58ca2794ba7ae74d20499bbe0637078b4
3
  size 627
trainer_state.json CHANGED
@@ -1,467 +1,403 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.653179190751445,
5
- "global_step": 17000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.58,
12
- "learning_rate": 1.15606936416185e-05,
13
- "loss": 4.1602,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 1.0,
18
- "eval_gen_len": 15.5003,
19
- "eval_loss": 2.8024423122406006,
20
- "eval_rouge1": 0.2552,
21
- "eval_rouge2": 0.1019,
22
- "eval_rougeL": 0.2331,
23
- "eval_rougeLsum": 0.2332,
24
- "eval_runtime": 184.148,
25
- "eval_samples_per_second": 18.778,
26
- "eval_steps_per_second": 1.178,
27
- "step": 865
28
- },
29
- {
30
- "epoch": 1.16,
31
- "learning_rate": 1.9835716458777003e-05,
32
- "loss": 3.1173,
33
  "step": 1000
34
  },
35
  {
36
- "epoch": 1.73,
37
- "learning_rate": 1.922725889869182e-05,
38
- "loss": 2.9624,
39
  "step": 1500
40
  },
41
  {
42
- "epoch": 2.0,
43
- "eval_gen_len": 15.8271,
44
- "eval_loss": 2.6451988220214844,
45
- "eval_rouge1": 0.2882,
46
- "eval_rouge2": 0.1183,
47
- "eval_rougeL": 0.2626,
48
- "eval_rougeLsum": 0.2626,
49
- "eval_runtime": 182.4271,
50
- "eval_samples_per_second": 18.956,
51
- "eval_steps_per_second": 1.19,
52
- "step": 1730
53
- },
54
- {
55
- "epoch": 2.31,
56
- "learning_rate": 1.8618801338606633e-05,
57
- "loss": 2.8766,
58
  "step": 2000
59
  },
60
  {
61
- "epoch": 2.89,
62
- "learning_rate": 1.801034377852145e-05,
63
- "loss": 2.8206,
64
  "step": 2500
65
  },
66
  {
67
- "epoch": 3.0,
68
- "eval_gen_len": 16.2183,
69
- "eval_loss": 2.5787582397460938,
70
- "eval_rouge1": 0.2952,
71
- "eval_rouge2": 0.1218,
72
- "eval_rougeL": 0.2684,
73
- "eval_rougeLsum": 0.2685,
74
- "eval_runtime": 182.2993,
75
- "eval_samples_per_second": 18.969,
76
- "eval_steps_per_second": 1.19,
77
- "step": 2595
78
- },
79
- {
80
- "epoch": 3.47,
81
- "learning_rate": 1.7401886218436267e-05,
82
- "loss": 2.7764,
83
  "step": 3000
84
  },
85
  {
86
- "epoch": 4.0,
87
- "eval_gen_len": 16.3418,
88
- "eval_loss": 2.5332629680633545,
89
- "eval_rouge1": 0.3029,
90
- "eval_rouge2": 0.1258,
91
- "eval_rougeL": 0.2755,
92
- "eval_rougeLsum": 0.2754,
93
- "eval_runtime": 181.4496,
94
- "eval_samples_per_second": 19.058,
95
- "eval_steps_per_second": 1.196,
96
- "step": 3460
97
- },
98
- {
99
- "epoch": 4.05,
100
- "learning_rate": 1.679342865835108e-05,
101
- "loss": 2.7414,
102
  "step": 3500
103
  },
104
  {
105
- "epoch": 4.62,
106
- "learning_rate": 1.6184971098265897e-05,
107
- "loss": 2.6997,
108
  "step": 4000
109
  },
110
  {
111
- "epoch": 5.0,
112
- "eval_gen_len": 16.4101,
113
- "eval_loss": 2.504000186920166,
114
- "eval_rouge1": 0.3046,
115
- "eval_rouge2": 0.1273,
116
- "eval_rougeL": 0.2773,
117
- "eval_rougeLsum": 0.2771,
118
- "eval_runtime": 181.6751,
119
- "eval_samples_per_second": 19.034,
120
- "eval_steps_per_second": 1.194,
121
- "step": 4325
122
- },
123
- {
124
- "epoch": 5.2,
125
- "learning_rate": 1.5576513538180714e-05,
126
- "loss": 2.6795,
127
  "step": 4500
128
  },
129
  {
130
- "epoch": 5.78,
131
- "learning_rate": 1.4968055978095528e-05,
132
- "loss": 2.6647,
133
  "step": 5000
134
  },
135
  {
136
- "epoch": 6.0,
137
- "eval_gen_len": 16.354,
138
- "eval_loss": 2.4734175205230713,
139
- "eval_rouge1": 0.3069,
140
- "eval_rouge2": 0.1298,
141
- "eval_rougeL": 0.2802,
142
- "eval_rougeLsum": 0.2801,
143
- "eval_runtime": 181.0262,
144
- "eval_samples_per_second": 19.102,
145
- "eval_steps_per_second": 1.199,
146
- "step": 5190
147
- },
148
- {
149
- "epoch": 6.36,
150
- "learning_rate": 1.4359598418010346e-05,
151
- "loss": 2.6287,
152
  "step": 5500
153
  },
154
  {
155
- "epoch": 6.94,
156
- "learning_rate": 1.375114085792516e-05,
157
- "loss": 2.6196,
158
  "step": 6000
159
  },
160
  {
161
- "epoch": 7.0,
162
- "eval_gen_len": 16.526,
163
- "eval_loss": 2.4547228813171387,
164
- "eval_rouge1": 0.3077,
165
- "eval_rouge2": 0.131,
166
- "eval_rougeL": 0.2804,
167
- "eval_rougeLsum": 0.2804,
168
- "eval_runtime": 182.5951,
169
- "eval_samples_per_second": 18.938,
170
- "eval_steps_per_second": 1.188,
171
- "step": 6055
172
  },
173
  {
174
- "epoch": 7.51,
175
- "learning_rate": 1.3142683297839978e-05,
176
- "loss": 2.5916,
177
- "step": 6500
178
  },
179
  {
180
- "epoch": 8.0,
181
- "eval_gen_len": 16.4419,
182
- "eval_loss": 2.435168504714966,
183
- "eval_rouge1": 0.3107,
184
- "eval_rouge2": 0.1332,
185
- "eval_rougeL": 0.2833,
186
- "eval_rougeLsum": 0.2834,
187
- "eval_runtime": 181.1692,
188
- "eval_samples_per_second": 19.087,
189
- "eval_steps_per_second": 1.198,
190
- "step": 6920
 
 
 
 
 
 
191
  },
192
  {
193
- "epoch": 8.09,
194
- "learning_rate": 1.2534225737754793e-05,
195
- "loss": 2.5986,
196
- "step": 7000
197
  },
198
  {
199
- "epoch": 8.67,
200
- "learning_rate": 1.1925768177669607e-05,
201
- "loss": 2.5716,
202
- "step": 7500
203
  },
204
  {
205
- "epoch": 9.0,
206
- "eval_gen_len": 16.4023,
207
- "eval_loss": 2.4196717739105225,
208
- "eval_rouge1": 0.3092,
209
- "eval_rouge2": 0.132,
210
- "eval_rougeL": 0.2819,
211
- "eval_rougeLsum": 0.2817,
212
- "eval_runtime": 181.3331,
213
- "eval_samples_per_second": 19.07,
214
- "eval_steps_per_second": 1.197,
215
- "step": 7785
216
  },
217
  {
218
- "epoch": 9.25,
219
- "learning_rate": 1.1317310617584426e-05,
220
- "loss": 2.5347,
221
- "step": 8000
222
  },
223
  {
224
- "epoch": 9.83,
225
- "learning_rate": 1.070885305749924e-05,
226
- "loss": 2.548,
227
- "step": 8500
228
  },
229
  {
230
- "epoch": 10.0,
231
- "eval_gen_len": 16.3256,
232
- "eval_loss": 2.4123945236206055,
233
- "eval_rouge1": 0.3094,
234
- "eval_rouge2": 0.1329,
235
- "eval_rougeL": 0.2822,
236
- "eval_rougeLsum": 0.2823,
237
- "eval_runtime": 181.1565,
238
- "eval_samples_per_second": 19.088,
239
- "eval_steps_per_second": 1.198,
240
- "step": 8650
241
  },
242
  {
243
- "epoch": 10.4,
244
- "learning_rate": 1.0100395497414058e-05,
245
- "loss": 2.5237,
246
- "step": 9000
247
  },
248
  {
249
- "epoch": 10.98,
250
- "learning_rate": 9.491937937328873e-06,
251
- "loss": 2.5352,
252
- "step": 9500
253
  },
254
  {
255
- "epoch": 11.0,
256
- "eval_gen_len": 16.3774,
257
- "eval_loss": 2.402285575866699,
258
- "eval_rouge1": 0.3096,
259
- "eval_rouge2": 0.1323,
260
- "eval_rougeL": 0.2821,
261
- "eval_rougeLsum": 0.282,
262
- "eval_runtime": 181.4776,
263
- "eval_samples_per_second": 19.055,
264
- "eval_steps_per_second": 1.196,
265
- "step": 9515
266
  },
267
  {
268
- "epoch": 11.56,
269
- "learning_rate": 8.883480377243688e-06,
270
- "loss": 2.5087,
271
- "step": 10000
272
  },
273
  {
274
- "epoch": 12.0,
275
- "eval_gen_len": 16.4592,
276
- "eval_loss": 2.39676570892334,
277
- "eval_rouge1": 0.3107,
278
- "eval_rouge2": 0.1326,
279
- "eval_rougeL": 0.2831,
280
- "eval_rougeLsum": 0.2832,
281
- "eval_runtime": 182.2228,
282
- "eval_samples_per_second": 18.977,
283
- "eval_steps_per_second": 1.191,
284
- "step": 10380
285
  },
286
  {
287
- "epoch": 12.14,
288
- "learning_rate": 8.275022817158503e-06,
289
- "loss": 2.514,
290
- "step": 10500
291
  },
292
  {
293
- "epoch": 12.72,
294
- "learning_rate": 7.66656525707332e-06,
295
- "loss": 2.4865,
296
- "step": 11000
297
  },
298
  {
299
- "epoch": 13.0,
300
- "eval_gen_len": 16.4586,
301
- "eval_loss": 2.38985013961792,
302
- "eval_rouge1": 0.3117,
303
- "eval_rouge2": 0.1339,
304
- "eval_rougeL": 0.2841,
305
- "eval_rougeLsum": 0.2842,
306
- "eval_runtime": 181.3381,
307
- "eval_samples_per_second": 19.069,
308
- "eval_steps_per_second": 1.197,
309
- "step": 11245
310
  },
311
  {
312
- "epoch": 13.29,
313
- "learning_rate": 7.058107696988135e-06,
314
- "loss": 2.4861,
315
- "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  },
317
  {
318
- "epoch": 13.87,
319
- "learning_rate": 6.449650136902952e-06,
320
- "loss": 2.5081,
321
- "step": 12000
322
  },
323
  {
324
- "epoch": 14.0,
325
- "eval_gen_len": 16.4072,
326
- "eval_loss": 2.3885083198547363,
327
- "eval_rouge1": 0.3124,
328
- "eval_rouge2": 0.1337,
329
- "eval_rougeL": 0.2845,
330
- "eval_rougeLsum": 0.2845,
331
- "eval_runtime": 181.2166,
332
- "eval_samples_per_second": 19.082,
333
- "eval_steps_per_second": 1.197,
334
- "step": 12110
335
  },
336
  {
337
- "epoch": 14.45,
338
- "learning_rate": 5.841192576817768e-06,
339
- "loss": 2.4748,
340
- "step": 12500
341
  },
342
  {
343
- "epoch": 15.0,
344
- "eval_gen_len": 16.4572,
345
- "eval_loss": 2.3791513442993164,
346
- "eval_rouge1": 0.3131,
347
- "eval_rouge2": 0.134,
348
- "eval_rougeL": 0.285,
349
- "eval_rougeLsum": 0.2849,
350
- "eval_runtime": 181.192,
351
- "eval_samples_per_second": 19.085,
352
- "eval_steps_per_second": 1.198,
353
- "step": 12975
354
  },
355
  {
356
- "epoch": 15.03,
357
- "learning_rate": 5.232735016732583e-06,
358
- "loss": 2.4641,
359
- "step": 13000
360
  },
361
  {
362
- "epoch": 15.61,
363
- "learning_rate": 4.6242774566473994e-06,
364
- "loss": 2.469,
365
- "step": 13500
366
  },
367
  {
368
- "epoch": 16.0,
369
- "eval_gen_len": 16.3907,
370
- "eval_loss": 2.3784728050231934,
371
- "eval_rouge1": 0.3119,
372
- "eval_rouge2": 0.1333,
373
- "eval_rougeL": 0.2841,
374
- "eval_rougeLsum": 0.284,
375
- "eval_runtime": 180.9918,
376
- "eval_samples_per_second": 19.106,
377
- "eval_steps_per_second": 1.199,
378
- "step": 13840
379
  },
380
  {
381
- "epoch": 16.18,
382
- "learning_rate": 4.0158198965622155e-06,
383
- "loss": 2.4771,
384
- "step": 14000
385
  },
386
  {
387
- "epoch": 16.76,
388
- "learning_rate": 3.4073623364770307e-06,
389
- "loss": 2.4587,
390
- "step": 14500
391
  },
392
  {
393
- "epoch": 17.0,
394
- "eval_gen_len": 16.4488,
395
- "eval_loss": 2.3765616416931152,
396
- "eval_rouge1": 0.3115,
397
- "eval_rouge2": 0.1334,
398
- "eval_rougeL": 0.2835,
399
- "eval_rougeLsum": 0.2834,
400
- "eval_runtime": 181.9602,
401
- "eval_samples_per_second": 19.004,
402
- "eval_steps_per_second": 1.193,
403
- "step": 14705
404
  },
405
  {
406
- "epoch": 17.34,
407
- "learning_rate": 2.798904776391847e-06,
408
- "loss": 2.4679,
409
- "step": 15000
410
  },
411
  {
412
- "epoch": 17.92,
413
- "learning_rate": 2.1904472163066628e-06,
414
- "loss": 2.4428,
415
- "step": 15500
416
  },
417
  {
418
- "epoch": 18.0,
419
- "eval_gen_len": 16.4141,
420
- "eval_loss": 2.375408411026001,
421
- "eval_rouge1": 0.3137,
422
- "eval_rouge2": 0.1346,
423
- "eval_rougeL": 0.2854,
424
- "eval_rougeLsum": 0.2854,
425
- "eval_runtime": 181.1095,
426
- "eval_samples_per_second": 19.093,
427
- "eval_steps_per_second": 1.198,
428
- "step": 15570
429
  },
430
  {
431
- "epoch": 18.5,
432
- "learning_rate": 1.5819896562214788e-06,
433
- "loss": 2.4548,
434
- "step": 16000
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  },
436
  {
437
- "epoch": 19.0,
438
- "eval_gen_len": 16.4451,
439
- "eval_loss": 2.3725531101226807,
440
- "eval_rouge1": 0.3129,
441
- "eval_rouge2": 0.1341,
442
- "eval_rougeL": 0.2849,
443
- "eval_rougeLsum": 0.285,
444
- "eval_runtime": 180.98,
445
- "eval_samples_per_second": 19.107,
446
- "eval_steps_per_second": 1.199,
447
- "step": 16435
448
  },
449
  {
450
- "epoch": 19.08,
451
- "learning_rate": 9.735320961362946e-07,
452
- "loss": 2.4426,
453
- "step": 16500
454
  },
455
  {
456
- "epoch": 19.65,
457
- "learning_rate": 3.650745360511105e-07,
458
- "loss": 2.4389,
459
- "step": 17000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  }
461
  ],
462
- "max_steps": 17300,
463
- "num_train_epochs": 20,
464
- "total_flos": 6.944051276532941e+16,
465
  "trial_name": null,
466
  "trial_params": null
467
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.9332700393327005,
5
+ "global_step": 29000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.07,
12
+ "learning_rate": 8.440677966101696e-05,
13
+ "loss": 2.551,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.14,
18
+ "learning_rate": 0.00016915254237288136,
19
+ "loss": 2.541,
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 0.2,
24
+ "learning_rate": 0.0002538983050847458,
25
+ "loss": 2.5194,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 0.27,
30
+ "learning_rate": 0.00033864406779661016,
31
+ "loss": 2.4769,
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 0.34,
36
+ "learning_rate": 0.0004233898305084746,
37
+ "loss": 2.46,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 0.41,
42
+ "learning_rate": 0.0004990957727375481,
43
+ "loss": 2.403,
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 0.47,
48
+ "learning_rate": 0.0004897144148896089,
49
+ "loss": 2.3829,
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  "step": 3500
51
  },
52
  {
53
+ "epoch": 0.54,
54
+ "learning_rate": 0.0004802953809057343,
55
+ "loss": 2.3175,
56
  "step": 4000
57
  },
58
  {
59
+ "epoch": 0.61,
60
+ "learning_rate": 0.0004708763469218597,
61
+ "loss": 2.3087,
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  "step": 4500
63
  },
64
  {
65
+ "epoch": 0.68,
66
+ "learning_rate": 0.0004614573129379851,
67
+ "loss": 2.2763,
68
  "step": 5000
69
  },
70
  {
71
+ "epoch": 0.75,
72
+ "learning_rate": 0.0004520382789541105,
73
+ "loss": 2.2438,
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  "step": 5500
75
  },
76
  {
77
+ "epoch": 0.81,
78
+ "learning_rate": 0.0004426192449702359,
79
+ "loss": 2.2358,
80
  "step": 6000
81
  },
82
  {
83
+ "epoch": 0.88,
84
+ "learning_rate": 0.000433219049054329,
85
+ "loss": 2.194,
86
+ "step": 6500
 
 
 
 
 
 
 
87
  },
88
  {
89
+ "epoch": 0.95,
90
+ "learning_rate": 0.0004238000150704544,
91
+ "loss": 2.1761,
92
+ "step": 7000
93
  },
94
  {
95
+ "epoch": 1.0,
96
+ "eval_gen_len": 16.7426,
97
+ "eval_loss": 2.006333589553833,
98
+ "eval_rouge1": 0.3422,
99
+ "eval_rouge2": 0.1605,
100
+ "eval_rougeL": 0.3139,
101
+ "eval_rougeLsum": 0.314,
102
+ "eval_runtime": 782.1975,
103
+ "eval_samples_per_second": 16.757,
104
+ "eval_steps_per_second": 2.095,
105
+ "step": 7373
106
+ },
107
+ {
108
+ "epoch": 1.02,
109
+ "learning_rate": 0.0004143809810865798,
110
+ "loss": 2.1455,
111
+ "step": 7500
112
  },
113
  {
114
+ "epoch": 1.09,
115
+ "learning_rate": 0.0004049619471027052,
116
+ "loss": 2.0777,
117
+ "step": 8000
118
  },
119
  {
120
+ "epoch": 1.15,
121
+ "learning_rate": 0.0003955429131188306,
122
+ "loss": 2.0945,
123
+ "step": 8500
124
  },
125
  {
126
+ "epoch": 1.22,
127
+ "learning_rate": 0.00038612387913495596,
128
+ "loss": 2.0485,
129
+ "step": 9000
 
 
 
 
 
 
 
130
  },
131
  {
132
+ "epoch": 1.29,
133
+ "learning_rate": 0.00037670484515108134,
134
+ "loss": 2.054,
135
+ "step": 9500
136
  },
137
  {
138
+ "epoch": 1.36,
139
+ "learning_rate": 0.0003672858111672067,
140
+ "loss": 2.073,
141
+ "step": 10000
142
  },
143
  {
144
+ "epoch": 1.42,
145
+ "learning_rate": 0.0003578667771833321,
146
+ "loss": 2.0396,
147
+ "step": 10500
 
 
 
 
 
 
 
148
  },
149
  {
150
+ "epoch": 1.49,
151
+ "learning_rate": 0.0003484665812674252,
152
+ "loss": 2.0387,
153
+ "step": 11000
154
  },
155
  {
156
+ "epoch": 1.56,
157
+ "learning_rate": 0.0003390475472835506,
158
+ "loss": 2.0198,
159
+ "step": 11500
160
  },
161
  {
162
+ "epoch": 1.63,
163
+ "learning_rate": 0.000329628513299676,
164
+ "loss": 2.0144,
165
+ "step": 12000
 
 
 
 
 
 
 
166
  },
167
  {
168
+ "epoch": 1.7,
169
+ "learning_rate": 0.00032020947931580136,
170
+ "loss": 2.0256,
171
+ "step": 12500
172
  },
173
  {
174
+ "epoch": 1.76,
175
+ "learning_rate": 0.0003108092833998945,
176
+ "loss": 2.0027,
177
+ "step": 13000
 
 
 
 
 
 
 
178
  },
179
  {
180
+ "epoch": 1.83,
181
+ "learning_rate": 0.0003013902494160199,
182
+ "loss": 2.0005,
183
+ "step": 13500
184
  },
185
  {
186
+ "epoch": 1.9,
187
+ "learning_rate": 0.0002919712154321453,
188
+ "loss": 1.9977,
189
+ "step": 14000
190
  },
191
  {
192
+ "epoch": 1.97,
193
+ "learning_rate": 0.0002825521814482707,
194
+ "loss": 1.9967,
195
+ "step": 14500
 
 
 
 
 
 
 
196
  },
197
  {
198
+ "epoch": 2.0,
199
+ "eval_gen_len": 16.758,
200
+ "eval_loss": 1.8912419080734253,
201
+ "eval_rouge1": 0.3607,
202
+ "eval_rouge2": 0.1759,
203
+ "eval_rougeL": 0.3322,
204
+ "eval_rougeLsum": 0.3321,
205
+ "eval_runtime": 782.0201,
206
+ "eval_samples_per_second": 16.76,
207
+ "eval_steps_per_second": 2.096,
208
+ "step": 14746
209
+ },
210
+ {
211
+ "epoch": 2.03,
212
+ "learning_rate": 0.0002731519855323638,
213
+ "loss": 1.9494,
214
+ "step": 15000
215
  },
216
  {
217
+ "epoch": 2.1,
218
+ "learning_rate": 0.0002637329515484892,
219
+ "loss": 1.8612,
220
+ "step": 15500
221
  },
222
  {
223
+ "epoch": 2.17,
224
+ "learning_rate": 0.0002543139175646146,
225
+ "loss": 1.9076,
226
+ "step": 16000
 
 
 
 
 
 
 
227
  },
228
  {
229
+ "epoch": 2.24,
230
+ "learning_rate": 0.00024489488358074,
231
+ "loss": 1.8987,
232
+ "step": 16500
233
  },
234
  {
235
+ "epoch": 2.31,
236
+ "learning_rate": 0.00023547584959686534,
237
+ "loss": 1.8882,
238
+ "step": 17000
 
 
 
 
 
 
 
239
  },
240
  {
241
+ "epoch": 2.37,
242
+ "learning_rate": 0.00022605681561299075,
243
+ "loss": 1.9032,
244
+ "step": 17500
245
  },
246
  {
247
+ "epoch": 2.44,
248
+ "learning_rate": 0.00021663778162911613,
249
+ "loss": 1.8853,
250
+ "step": 18000
251
  },
252
  {
253
+ "epoch": 2.51,
254
+ "learning_rate": 0.00020721874764524151,
255
+ "loss": 1.8841,
256
+ "step": 18500
 
 
 
 
 
 
 
257
  },
258
  {
259
+ "epoch": 2.58,
260
+ "learning_rate": 0.00019781855172933463,
261
+ "loss": 1.8747,
262
+ "step": 19000
263
  },
264
  {
265
+ "epoch": 2.64,
266
+ "learning_rate": 0.00018841835581342778,
267
+ "loss": 1.8842,
268
+ "step": 19500
269
  },
270
  {
271
+ "epoch": 2.71,
272
+ "learning_rate": 0.00017899932182955316,
273
+ "loss": 1.8856,
274
+ "step": 20000
 
 
 
 
 
 
 
275
  },
276
  {
277
+ "epoch": 2.78,
278
+ "learning_rate": 0.00016958028784567857,
279
+ "loss": 1.8751,
280
+ "step": 20500
281
  },
282
  {
283
+ "epoch": 2.85,
284
+ "learning_rate": 0.00016016125386180395,
285
+ "loss": 1.8806,
286
+ "step": 21000
287
  },
288
  {
289
+ "epoch": 2.92,
290
+ "learning_rate": 0.00015074221987792933,
291
+ "loss": 1.8519,
292
+ "step": 21500
 
 
 
 
 
 
 
293
  },
294
  {
295
+ "epoch": 2.98,
296
+ "learning_rate": 0.00014134202396202245,
297
+ "loss": 1.8716,
298
+ "step": 22000
299
+ },
300
+ {
301
+ "epoch": 3.0,
302
+ "eval_gen_len": 16.6685,
303
+ "eval_loss": 1.8340635299682617,
304
+ "eval_rouge1": 0.3656,
305
+ "eval_rouge2": 0.1782,
306
+ "eval_rougeL": 0.3354,
307
+ "eval_rougeLsum": 0.3355,
308
+ "eval_runtime": 781.3209,
309
+ "eval_samples_per_second": 16.775,
310
+ "eval_steps_per_second": 2.098,
311
+ "step": 22119
312
  },
313
  {
314
+ "epoch": 3.05,
315
+ "learning_rate": 0.00013192298997814783,
316
+ "loss": 1.8213,
317
+ "step": 22500
 
 
 
 
 
 
 
318
  },
319
  {
320
+ "epoch": 3.12,
321
+ "learning_rate": 0.00012250395599427324,
322
+ "loss": 1.78,
323
+ "step": 23000
324
  },
325
  {
326
+ "epoch": 3.19,
327
+ "learning_rate": 0.00011308492201039862,
328
+ "loss": 1.7891,
329
+ "step": 23500
330
+ },
331
+ {
332
+ "epoch": 3.26,
333
+ "learning_rate": 0.00010366588802652401,
334
+ "loss": 1.7992,
335
+ "step": 24000
336
+ },
337
+ {
338
+ "epoch": 3.32,
339
+ "learning_rate": 9.424685404264939e-05,
340
+ "loss": 1.8033,
341
+ "step": 24500
342
+ },
343
+ {
344
+ "epoch": 3.39,
345
+ "learning_rate": 8.482782005877477e-05,
346
+ "loss": 1.8072,
347
+ "step": 25000
348
+ },
349
+ {
350
+ "epoch": 3.46,
351
+ "learning_rate": 7.540878607490017e-05,
352
+ "loss": 1.7981,
353
+ "step": 25500
354
+ },
355
+ {
356
+ "epoch": 3.53,
357
+ "learning_rate": 6.60085901589933e-05,
358
+ "loss": 1.7978,
359
+ "step": 26000
360
+ },
361
+ {
362
+ "epoch": 3.59,
363
+ "learning_rate": 5.6589556175118686e-05,
364
+ "loss": 1.7927,
365
+ "step": 26500
366
+ },
367
+ {
368
+ "epoch": 3.66,
369
+ "learning_rate": 4.7170522191244066e-05,
370
+ "loss": 1.7735,
371
+ "step": 27000
372
+ },
373
+ {
374
+ "epoch": 3.73,
375
+ "learning_rate": 3.7751488207369454e-05,
376
+ "loss": 1.7863,
377
+ "step": 27500
378
+ },
379
+ {
380
+ "epoch": 3.8,
381
+ "learning_rate": 2.8332454223494838e-05,
382
+ "loss": 1.7965,
383
+ "step": 28000
384
+ },
385
+ {
386
+ "epoch": 3.87,
387
+ "learning_rate": 1.8932258307587973e-05,
388
+ "loss": 1.7722,
389
+ "step": 28500
390
+ },
391
+ {
392
+ "epoch": 3.93,
393
+ "learning_rate": 9.513224323713361e-06,
394
+ "loss": 1.7679,
395
+ "step": 29000
396
  }
397
  ],
398
+ "max_steps": 29492,
399
+ "num_train_epochs": 4,
400
+ "total_flos": 1.1856217832207155e+17,
401
  "trial_name": null,
402
  "trial_params": null
403
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037777afe7f6a0e02734b8f27ee76168428c6a68b7c2296930ac277edcf3ea7b
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9327d8e3421496510caca2bb5661ec80a69fff8151109ade379e3c1754ab335d
3
  size 4091