leixa commited on
Commit
8c1c4b6
·
verified ·
1 Parent(s): a34e3ff

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1498e68997d7efa80748284328828a3bb2095fd364da94d35c6ca3f4dea61555
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a166fd1375d531101b7fe544fadf6ca75cb91e0259b33cdd526e64d4a2ffc1b
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f98f37933ae8e652c4d6bcea0f8ff8a8c6e3287d7990af1d665e7c9d3a2a080
3
  size 509176980
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daac49d5d7c56d84fb7f9ba7c9dee83610088e5c7a7ae5db279cfd5b1e0f0781
3
  size 509176980
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a91207079a4d06a2e0343a805bd4731bdce1f224f8753275a8c1594d4ff5d7d0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c595568d5a7bb7b9b57941fb29c461e3dc17e26eb6b784f3922a9394cd85bee
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba2da34f28903361f0dc730c5c7c178c98918af7f2f4dbd9a9718ee70beb337c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c70338869ffec5aa6e537de1ea64302e45bfe61a54ec17491a7c787e89c12b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.303030303030303,
5
  "eval_steps": 15,
6
- "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -230,6 +230,49 @@
230
  "eval_samples_per_second": 4.432,
231
  "eval_steps_per_second": 0.594,
232
  "step": 75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  }
234
  ],
235
  "logging_steps": 3,
@@ -249,7 +292,7 @@
249
  "attributes": {}
250
  }
251
  },
252
- "total_flos": 3.86404668407808e+17,
253
  "train_batch_size": 8,
254
  "trial_name": null,
255
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5627705627705628,
5
  "eval_steps": 15,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
230
  "eval_samples_per_second": 4.432,
231
  "eval_steps_per_second": 0.594,
232
  "step": 75
233
+ },
234
+ {
235
+ "epoch": 1.354978354978355,
236
+ "grad_norm": 0.8432238101959229,
237
+ "learning_rate": 6.286336541589224e-05,
238
+ "loss": 2.2449,
239
+ "step": 78
240
+ },
241
+ {
242
+ "epoch": 1.406926406926407,
243
+ "grad_norm": 0.9027056097984314,
244
+ "learning_rate": 6.004970048339226e-05,
245
+ "loss": 2.2785,
246
+ "step": 81
247
+ },
248
+ {
249
+ "epoch": 1.4588744588744589,
250
+ "grad_norm": 0.977626621723175,
251
+ "learning_rate": 5.7202446389173223e-05,
252
+ "loss": 2.1978,
253
+ "step": 84
254
+ },
255
+ {
256
+ "epoch": 1.5108225108225108,
257
+ "grad_norm": 1.0718011856079102,
258
+ "learning_rate": 5.433111952413495e-05,
259
+ "loss": 2.1774,
260
+ "step": 87
261
+ },
262
+ {
263
+ "epoch": 1.5627705627705628,
264
+ "grad_norm": 1.034270167350769,
265
+ "learning_rate": 5.144531673771363e-05,
266
+ "loss": 2.1726,
267
+ "step": 90
268
+ },
269
+ {
270
+ "epoch": 1.5627705627705628,
271
+ "eval_loss": 0.6731178164482117,
272
+ "eval_runtime": 21.8631,
273
+ "eval_samples_per_second": 4.437,
274
+ "eval_steps_per_second": 0.595,
275
+ "step": 90
276
  }
277
  ],
278
  "logging_steps": 3,
 
292
  "attributes": {}
293
  }
294
  },
295
+ "total_flos": 4.636856020893696e+17,
296
  "train_batch_size": 8,
297
  "trial_name": null,
298
  "trial_params": null