leixa commited on
Commit
d8686d2
·
verified ·
1 Parent(s): 732643f

Training in progress, step 105, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31950ae9617f892cd1a2f1b3497a6c89d63f13022f3b417331eb374c2d0a0aa6
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8ab265d07c6eecf718607b5120d82a9dd48aa873031f384d6025f0af085749
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9882a0339d70f771ed47074c510984676dd1f90485341b9d71c79a6578d4a801
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6309403c2680202e09f8fd0c2a08f99dfe3acbfe19d3d0778c0665f7f005aed6
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:611585759af06adf1fef356a0b29f640225adbdcc1a21f2c2557f30972d4755b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5b1aae0830d1010a88ca597c0f1ed03901b00f32510cacc8649169e36f65fbe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c082198971fde7d345d487d7f9a561afc194de17619b9de850d57aaedab580c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ecf5d3685ba2ba738f94f0ff54d87b59bd9a5e0c4c32dcfd75219f10311d69a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0181268882175227,
5
  "eval_steps": 21,
6
- "global_step": 84,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -243,6 +243,63 @@
243
  "eval_samples_per_second": 537.448,
244
  "eval_steps_per_second": 69.1,
245
  "step": 84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  }
247
  ],
248
  "logging_steps": 3,
@@ -262,7 +319,7 @@
262
  "attributes": {}
263
  }
264
  },
265
- "total_flos": 9370255491072.0,
266
  "train_batch_size": 8,
267
  "trial_name": null,
268
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2719033232628398,
5
  "eval_steps": 21,
6
+ "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
243
  "eval_samples_per_second": 537.448,
244
  "eval_steps_per_second": 69.1,
245
  "step": 84
246
+ },
247
+ {
248
+ "epoch": 1.054380664652568,
249
+ "grad_norm": 0.24083495140075684,
250
+ "learning_rate": 7.650215591292888e-05,
251
+ "loss": 10.8142,
252
+ "step": 87
253
+ },
254
+ {
255
+ "epoch": 1.0906344410876132,
256
+ "grad_norm": 0.20766647160053253,
257
+ "learning_rate": 7.481003266247744e-05,
258
+ "loss": 10.0981,
259
+ "step": 90
260
+ },
261
+ {
262
+ "epoch": 1.1268882175226587,
263
+ "grad_norm": 0.19924764335155487,
264
+ "learning_rate": 7.307933338397667e-05,
265
+ "loss": 10.1149,
266
+ "step": 93
267
+ },
268
+ {
269
+ "epoch": 1.163141993957704,
270
+ "grad_norm": 0.2571873664855957,
271
+ "learning_rate": 7.131274906557725e-05,
272
+ "loss": 10.134,
273
+ "step": 96
274
+ },
275
+ {
276
+ "epoch": 1.1993957703927491,
277
+ "grad_norm": 0.20171616971492767,
278
+ "learning_rate": 6.95130264914993e-05,
279
+ "loss": 10.2961,
280
+ "step": 99
281
+ },
282
+ {
283
+ "epoch": 1.2356495468277946,
284
+ "grad_norm": 0.2096317708492279,
285
+ "learning_rate": 6.768296397117848e-05,
286
+ "loss": 10.2312,
287
+ "step": 102
288
+ },
289
+ {
290
+ "epoch": 1.2719033232628398,
291
+ "grad_norm": 0.28320643305778503,
292
+ "learning_rate": 6.582540698829781e-05,
293
+ "loss": 10.2853,
294
+ "step": 105
295
+ },
296
+ {
297
+ "epoch": 1.2719033232628398,
298
+ "eval_loss": 10.228970527648926,
299
+ "eval_runtime": 0.2716,
300
+ "eval_samples_per_second": 515.557,
301
+ "eval_steps_per_second": 66.286,
302
+ "step": 105
303
  }
304
  ],
305
  "logging_steps": 3,
 
319
  "attributes": {}
320
  }
321
  },
322
+ "total_flos": 11712819363840.0,
323
  "train_batch_size": 8,
324
  "trial_name": null,
325
  "trial_params": null