Femboyuwu2000 commited on
Commit
dfe4fe5
·
verified ·
1 Parent(s): 0b0a20e

Training in progress, step 780, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcf8cdd4fa216bd4c7f8e742e37dbe5d3cf9b6677692371f47c01ee2359a092f
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2df16ce8b561b352d3312652c40d477fbb877cd56ea970096f2dbaffa07fb552
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:851c1edd126e12df0d623062321c6609c8b64c6756e2eeeb1490e4cea8239ad7
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b6d76fc568477d34da53d466b9db1022be4baaf30111fac5d0d9a5be4fdbc49
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:140fb37fd9e85fa1ebc4cf0cf5b0a29ddff69c0f41174cb65d6a4f64fdb5b957
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66230a2c3a0a8fe730262d29372e7febdc116b90ed069ff519fa96f8d1b53e9a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e0670aecb01576e1c66fe6495444d1847e93027019e1c292c2aa930fb3fa0e6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3939cbd2421a0d3e4ed3c12b3cceaa472fcdb8fe6c9b60b2d2c772cdc28de91f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0608,
5
  "eval_steps": 500,
6
- "global_step": 760,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -273,6 +273,13 @@
273
  "learning_rate": 2.9994965304402304e-05,
274
  "loss": 3.7613,
275
  "step": 760
 
 
 
 
 
 
 
276
  }
277
  ],
278
  "logging_steps": 20,
@@ -280,7 +287,7 @@
280
  "num_input_tokens_seen": 0,
281
  "num_train_epochs": 2,
282
  "save_steps": 20,
283
- "total_flos": 1808852407615488.0,
284
  "train_batch_size": 8,
285
  "trial_name": null,
286
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0624,
5
  "eval_steps": 500,
6
+ "global_step": 780,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
273
  "learning_rate": 2.9994965304402304e-05,
274
  "loss": 3.7613,
275
  "step": 760
276
+ },
277
+ {
278
+ "epoch": 0.06,
279
+ "grad_norm": 38.32442855834961,
280
+ "learning_rate": 2.999362805807425e-05,
281
+ "loss": 3.7586,
282
+ "step": 780
283
  }
284
  ],
285
  "logging_steps": 20,
 
287
  "num_input_tokens_seen": 0,
288
  "num_train_epochs": 2,
289
  "save_steps": 20,
290
+ "total_flos": 1847858637668352.0,
291
  "train_batch_size": 8,
292
  "trial_name": null,
293
  "trial_params": null