leixa commited on
Commit
434bfea
·
verified ·
1 Parent(s): 05f8211

Training in progress, step 124, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caeddd11e4b9cc9d83e11d9ec3987e94c5436376e83a9998ce3068e458397d51
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:672b05673b812e5e9c2b96e7fc581c47c8bc45fe637a2c64823fa5c26313a4d1
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:705b00d71daa4142253c16c1bebea91dabaa93d2b6338a801dde26f42519f2dc
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd0849778cac2f59c2171424c9084ba127563d2e4a8b6176002dcd78d4fcb10
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67ac7f1c5347e279b4947122f9a4f1ea87fde404d3dbf35cb5a047b4265298f9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e848e9332425e8f19037f20611574d35f1552d094f74408669d2f10122fbcb5d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96bfc2360018b511a262b59e99fcddde6a9bd8f019814a20e26529e523d531e5
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ccd887015d7faf1da47941525d069c3cb7c1208d0267f6e00e06c9dcf3b23ab
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7545638945233266,
5
  "eval_steps": 31,
6
- "global_step": 93,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -256,6 +256,84 @@
256
  "eval_samples_per_second": 15.452,
257
  "eval_steps_per_second": 1.931,
258
  "step": 93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  }
260
  ],
261
  "logging_steps": 3,
@@ -275,7 +353,7 @@
275
  "attributes": {}
276
  }
277
  },
278
- "total_flos": 1.2373610700629606e+17,
279
  "train_batch_size": 8,
280
  "trial_name": null,
281
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0060851926977687,
5
  "eval_steps": 31,
6
+ "global_step": 124,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
256
  "eval_samples_per_second": 15.452,
257
  "eval_steps_per_second": 1.931,
258
  "step": 93
259
+ },
260
+ {
261
+ "epoch": 0.7789046653144016,
262
+ "grad_norm": 4.811842918395996,
263
+ "learning_rate": 8.656768508095853e-05,
264
+ "loss": 2.2144,
265
+ "step": 96
266
+ },
267
+ {
268
+ "epoch": 0.8032454361054767,
269
+ "grad_norm": 4.506133556365967,
270
+ "learning_rate": 8.566252245770909e-05,
271
+ "loss": 2.0727,
272
+ "step": 99
273
+ },
274
+ {
275
+ "epoch": 0.8275862068965517,
276
+ "grad_norm": 3.898118734359741,
277
+ "learning_rate": 8.473291852294987e-05,
278
+ "loss": 1.9802,
279
+ "step": 102
280
+ },
281
+ {
282
+ "epoch": 0.8519269776876268,
283
+ "grad_norm": 4.056739807128906,
284
+ "learning_rate": 8.377951038078302e-05,
285
+ "loss": 2.034,
286
+ "step": 105
287
+ },
288
+ {
289
+ "epoch": 0.8762677484787018,
290
+ "grad_norm": 4.857217788696289,
291
+ "learning_rate": 8.280295144952536e-05,
292
+ "loss": 1.9623,
293
+ "step": 108
294
+ },
295
+ {
296
+ "epoch": 0.9006085192697769,
297
+ "grad_norm": 4.711864948272705,
298
+ "learning_rate": 8.18039110138882e-05,
299
+ "loss": 2.3024,
300
+ "step": 111
301
+ },
302
+ {
303
+ "epoch": 0.9249492900608519,
304
+ "grad_norm": 4.590112686157227,
305
+ "learning_rate": 8.07830737662829e-05,
306
+ "loss": 1.9634,
307
+ "step": 114
308
+ },
309
+ {
310
+ "epoch": 0.949290060851927,
311
+ "grad_norm": 6.305273056030273,
312
+ "learning_rate": 7.974113933756707e-05,
313
+ "loss": 2.0567,
314
+ "step": 117
315
+ },
316
+ {
317
+ "epoch": 0.973630831643002,
318
+ "grad_norm": 3.979123115539551,
319
+ "learning_rate": 7.86788218175523e-05,
320
+ "loss": 2.024,
321
+ "step": 120
322
+ },
323
+ {
324
+ "epoch": 0.9979716024340771,
325
+ "grad_norm": 3.4800965785980225,
326
+ "learning_rate": 7.75968492656029e-05,
327
+ "loss": 2.2663,
328
+ "step": 123
329
+ },
330
+ {
331
+ "epoch": 1.0060851926977687,
332
+ "eval_loss": 0.5130282640457153,
333
+ "eval_runtime": 13.4323,
334
+ "eval_samples_per_second": 15.485,
335
+ "eval_steps_per_second": 1.936,
336
+ "step": 124
337
  }
338
  ],
339
  "logging_steps": 3,
 
353
  "attributes": {}
354
  }
355
  },
356
+ "total_flos": 1.646904300577751e+17,
357
  "train_batch_size": 8,
358
  "trial_name": null,
359
  "trial_params": null