dzanbek commited on
Commit
0a3e028
1 Parent(s): c567ece

Training in progress, step 25, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dfea9e91529638011e2b2348c421b593487e3c8511b501b2034e4bfcd2358ee
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65374d1b18cf4991c2e64e6af0bab0b44d319f7cc5f345847f3f6a1e59cc6ae2
3
  size 335922386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3e1982295476e84d089e86abce749a0a269a426f32611cee726d775e4c4d567
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180effa7aec2337df1049151bf77ddb81760b06782e9b60ff738d685254932e3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9be43866b7a112efbf8125d7bbc11610819a4fb8c7f205bdfffb33dc32734ab8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4521b8db9cc205e54aa606d85e707c024abd2d8ad4a20bec4b2cff365dc59cdf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0007514030103083101,
5
  "eval_steps": 2,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -279,6 +279,13 @@
279
  "eval_samples_per_second": 4.189,
280
  "eval_steps_per_second": 4.189,
281
  "step": 24
 
 
 
 
 
 
 
282
  }
283
  ],
284
  "logging_steps": 1,
@@ -293,12 +300,12 @@
293
  "should_evaluate": false,
294
  "should_log": false,
295
  "should_save": true,
296
- "should_training_stop": false
297
  },
298
  "attributes": {}
299
  }
300
  },
301
- "total_flos": 1.6875107968352256e+16,
302
  "train_batch_size": 1,
303
  "trial_name": null,
304
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0007827114690711563,
5
  "eval_steps": 2,
6
+ "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
279
  "eval_samples_per_second": 4.189,
280
  "eval_steps_per_second": 4.189,
281
  "step": 24
282
+ },
283
+ {
284
+ "epoch": 0.0007827114690711563,
285
+ "grad_norm": NaN,
286
+ "learning_rate": 0.0,
287
+ "loss": 0.0,
288
+ "step": 25
289
  }
290
  ],
291
  "logging_steps": 1,
 
300
  "should_evaluate": false,
301
  "should_log": false,
302
  "should_save": true,
303
+ "should_training_stop": true
304
  },
305
  "attributes": {}
306
  }
307
  },
308
+ "total_flos": 1.75782374670336e+16,
309
  "train_batch_size": 1,
310
  "trial_name": null,
311
  "trial_params": null