dzanbek commited on
Commit
0c0ee12
1 Parent(s): 00d928a

Training in progress, step 28, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9c1d6bdd755319f1f434c95c22ec7ff65d2136a02928e38d507f58dc50ed300
3
  size 49846260
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2195117d41b17e4fe58cca08787f019143407a6dab60695c3c4ba2233a5692f
3
  size 49846260
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af7e627a0f75e8e1695b923aa7f2e71fc850c47eaa8c8adc15535761677b7864
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d476b0b3563b1fa3d4de8be22a666cbc835905a62f1f401aa3aa58a55379251
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:470670603e8fdc5330cdb9a9152c4fd9c3d8c5a74dd26bffbbb0d869d097eafa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff994fffd2fb6fe21545e6fbc55baa2a1474438a89b2d40605678f7de701427c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.026030368763557483,
5
  "eval_steps": 4,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -231,6 +231,42 @@
231
  "eval_samples_per_second": 7.65,
232
  "eval_steps_per_second": 3.835,
233
  "step": 24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ],
236
  "logging_steps": 1,
@@ -250,7 +286,7 @@
250
  "attributes": {}
251
  }
252
  },
253
- "total_flos": 6707623030161408.0,
254
  "train_batch_size": 2,
255
  "trial_name": null,
256
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03036876355748373,
5
  "eval_steps": 4,
6
+ "global_step": 28,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
231
  "eval_samples_per_second": 7.65,
232
  "eval_steps_per_second": 3.835,
233
  "step": 24
234
+ },
235
+ {
236
+ "epoch": 0.027114967462039046,
237
+ "grad_norm": NaN,
238
+ "learning_rate": 0.000138268343236509,
239
+ "loss": 0.0,
240
+ "step": 25
241
+ },
242
+ {
243
+ "epoch": 0.028199566160520606,
244
+ "grad_norm": NaN,
245
+ "learning_rate": 0.00013090169943749476,
246
+ "loss": 0.0,
247
+ "step": 26
248
+ },
249
+ {
250
+ "epoch": 0.02928416485900217,
251
+ "grad_norm": NaN,
252
+ "learning_rate": 0.00012334453638559057,
253
+ "loss": 0.0,
254
+ "step": 27
255
+ },
256
+ {
257
+ "epoch": 0.03036876355748373,
258
+ "grad_norm": NaN,
259
+ "learning_rate": 0.0001156434465040231,
260
+ "loss": 0.0,
261
+ "step": 28
262
+ },
263
+ {
264
+ "epoch": 0.03036876355748373,
265
+ "eval_loss": NaN,
266
+ "eval_runtime": 107.1539,
267
+ "eval_samples_per_second": 3.63,
268
+ "eval_steps_per_second": 1.82,
269
+ "step": 28
270
  }
271
  ],
272
  "logging_steps": 1,
 
286
  "attributes": {}
287
  }
288
  },
289
+ "total_flos": 7825560201854976.0,
290
  "train_batch_size": 2,
291
  "trial_name": null,
292
  "trial_params": null