GlycerinLOL commited on
Commit
8968593
1 Parent(s): 5bcd218

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +106 -24
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 11.99,
3
- "train_loss": 0.6240079896062867,
4
- "train_runtime": 9692.0216,
5
- "train_samples_per_second": 61.907,
6
- "train_steps_per_second": 0.483
7
  }
 
1
  {
2
+ "epoch": 15.99,
3
+ "train_loss": 0.4757408973498222,
4
+ "train_runtime": 9951.3836,
5
+ "train_samples_per_second": 80.391,
6
+ "train_steps_per_second": 0.627
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 11.99,
3
- "train_loss": 0.6240079896062867,
4
- "train_runtime": 9692.0216,
5
- "train_samples_per_second": 61.907,
6
- "train_steps_per_second": 0.483
7
  }
 
1
  {
2
+ "epoch": 15.99,
3
+ "train_loss": 0.4757408973498222,
4
+ "train_runtime": 9951.3836,
5
+ "train_samples_per_second": 80.391,
6
+ "train_steps_per_second": 0.627
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.990403071017274,
5
  "eval_steps": 500,
6
- "global_step": 4680,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -239,37 +239,119 @@
239
  "step": 4500
240
  },
241
  {
242
- "epoch": 11.99,
243
  "eval_f1": 0.9092,
244
- "eval_gen_len": 26.54581818181818,
245
- "eval_loss": 1.654082179069519,
246
- "eval_precision": 0.9101,
247
- "eval_recall": 0.9085,
248
- "eval_rouge1": 0.4665,
249
- "eval_rouge2": 0.2182,
250
- "eval_rougeL": 0.3824,
251
- "eval_rougeLsum": 0.3824,
252
- "eval_runtime": 506.4111,
253
- "eval_samples_per_second": 5.43,
254
- "eval_steps_per_second": 0.34,
255
  "step": 4680
256
  },
257
  {
258
- "epoch": 11.99,
259
- "step": 4680,
260
- "total_flos": 8.651981084751299e+17,
261
- "train_loss": 0.6240079896062867,
262
- "train_runtime": 9692.0216,
263
- "train_samples_per_second": 61.907,
264
- "train_steps_per_second": 0.483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  }
266
  ],
267
  "logging_steps": 500,
268
- "max_steps": 4680,
269
  "num_input_tokens_seen": 0,
270
- "num_train_epochs": 12,
271
  "save_steps": 500,
272
- "total_flos": 8.651981084751299e+17,
273
  "train_batch_size": 32,
274
  "trial_name": null,
275
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.990403071017274,
5
  "eval_steps": 500,
6
+ "global_step": 6240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
239
  "step": 4500
240
  },
241
  {
242
+ "epoch": 12.0,
243
  "eval_f1": 0.9092,
244
+ "eval_gen_len": 26.439272727272726,
245
+ "eval_loss": 1.6508440971374512,
246
+ "eval_precision": 0.9103,
247
+ "eval_recall": 0.9084,
248
+ "eval_rouge1": 0.4668,
249
+ "eval_rouge2": 0.2175,
250
+ "eval_rougeL": 0.3823,
251
+ "eval_rougeLsum": 0.3822,
252
+ "eval_runtime": 506.6299,
253
+ "eval_samples_per_second": 5.428,
254
+ "eval_steps_per_second": 0.339,
255
  "step": 4680
256
  },
257
  {
258
+ "epoch": 12.82,
259
+ "learning_rate": 3.974358974358974e-06,
260
+ "loss": 1.7165,
261
+ "step": 5000
262
+ },
263
+ {
264
+ "epoch": 13.0,
265
+ "eval_f1": 0.9094,
266
+ "eval_gen_len": 26.638545454545454,
267
+ "eval_loss": 1.6451035737991333,
268
+ "eval_precision": 0.9103,
269
+ "eval_recall": 0.9089,
270
+ "eval_rouge1": 0.4687,
271
+ "eval_rouge2": 0.2191,
272
+ "eval_rougeL": 0.3834,
273
+ "eval_rougeLsum": 0.3834,
274
+ "eval_runtime": 509.0115,
275
+ "eval_samples_per_second": 5.403,
276
+ "eval_steps_per_second": 0.338,
277
+ "step": 5071
278
+ },
279
+ {
280
+ "epoch": 14.0,
281
+ "eval_f1": 0.9095,
282
+ "eval_gen_len": 26.415636363636363,
283
+ "eval_loss": 1.6405242681503296,
284
+ "eval_precision": 0.9106,
285
+ "eval_recall": 0.9087,
286
+ "eval_rouge1": 0.4691,
287
+ "eval_rouge2": 0.2193,
288
+ "eval_rougeL": 0.3845,
289
+ "eval_rougeLsum": 0.3844,
290
+ "eval_runtime": 502.7756,
291
+ "eval_samples_per_second": 5.47,
292
+ "eval_steps_per_second": 0.342,
293
+ "step": 5462
294
+ },
295
+ {
296
+ "epoch": 14.1,
297
+ "learning_rate": 2.371794871794872e-06,
298
+ "loss": 1.7068,
299
+ "step": 5500
300
+ },
301
+ {
302
+ "epoch": 15.0,
303
+ "eval_f1": 0.9097,
304
+ "eval_gen_len": 26.45709090909091,
305
+ "eval_loss": 1.6383482217788696,
306
+ "eval_precision": 0.9108,
307
+ "eval_recall": 0.9089,
308
+ "eval_rouge1": 0.4699,
309
+ "eval_rouge2": 0.2204,
310
+ "eval_rougeL": 0.3853,
311
+ "eval_rougeLsum": 0.3853,
312
+ "eval_runtime": 506.608,
313
+ "eval_samples_per_second": 5.428,
314
+ "eval_steps_per_second": 0.34,
315
+ "step": 5853
316
+ },
317
+ {
318
+ "epoch": 15.38,
319
+ "learning_rate": 7.692307692307694e-07,
320
+ "loss": 1.7004,
321
+ "step": 6000
322
+ },
323
+ {
324
+ "epoch": 15.99,
325
+ "eval_f1": 0.9096,
326
+ "eval_gen_len": 26.52509090909091,
327
+ "eval_loss": 1.6378074884414673,
328
+ "eval_precision": 0.9107,
329
+ "eval_recall": 0.909,
330
+ "eval_rouge1": 0.4698,
331
+ "eval_rouge2": 0.2197,
332
+ "eval_rougeL": 0.385,
333
+ "eval_rougeLsum": 0.3849,
334
+ "eval_runtime": 501.725,
335
+ "eval_samples_per_second": 5.481,
336
+ "eval_steps_per_second": 0.343,
337
+ "step": 6240
338
+ },
339
+ {
340
+ "epoch": 15.99,
341
+ "step": 6240,
342
+ "total_flos": 1.153589772728402e+18,
343
+ "train_loss": 0.4757408973498222,
344
+ "train_runtime": 9951.3836,
345
+ "train_samples_per_second": 80.391,
346
+ "train_steps_per_second": 0.627
347
  }
348
  ],
349
  "logging_steps": 500,
350
+ "max_steps": 6240,
351
  "num_input_tokens_seen": 0,
352
+ "num_train_epochs": 16,
353
  "save_steps": 500,
354
+ "total_flos": 1.153589772728402e+18,
355
  "train_batch_size": 32,
356
  "trial_name": null,
357
  "trial_params": null