Femboyuwu2000 commited on
Commit
44545b7
1 Parent(s): eb54688

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2df16ce8b561b352d3312652c40d477fbb877cd56ea970096f2dbaffa07fb552
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7b375ea6f5e70ef64e4641abb6a08df506c86a9a7bb439340b989e59fe64c0c
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b6d76fc568477d34da53d466b9db1022be4baaf30111fac5d0d9a5be4fdbc49
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:033fa7ccae359fb38b200fbf2e73770b5a80ea7b2b9e5686853f4c0453109b9b
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66230a2c3a0a8fe730262d29372e7febdc116b90ed069ff519fa96f8d1b53e9a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f17d400e8d0a3279d48c33167ab70e99da7e17a6d2f06962e7ee39cc165c3506
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3939cbd2421a0d3e4ed3c12b3cceaa472fcdb8fe6c9b60b2d2c772cdc28de91f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0d32846dc851291d36978371f88c4a2e97295576d6f59868838a673ab4bcbf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0624,
5
  "eval_steps": 500,
6
- "global_step": 780,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -280,6 +280,13 @@
280
  "learning_rate": 2.999362805807425e-05,
281
  "loss": 3.7586,
282
  "step": 780
 
 
 
 
 
 
 
283
  }
284
  ],
285
  "logging_steps": 20,
@@ -287,7 +294,7 @@
287
  "num_input_tokens_seen": 0,
288
  "num_train_epochs": 2,
289
  "save_steps": 20,
290
- "total_flos": 1847858637668352.0,
291
  "train_batch_size": 8,
292
  "trial_name": null,
293
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.064,
5
  "eval_steps": 500,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
280
  "learning_rate": 2.999362805807425e-05,
281
  "loss": 3.7586,
282
  "step": 780
283
+ },
284
+ {
285
+ "epoch": 0.06,
286
+ "grad_norm": 30.289432525634766,
287
+ "learning_rate": 2.9992133535682725e-05,
288
+ "loss": 3.6919,
289
+ "step": 800
290
  }
291
  ],
292
  "logging_steps": 20,
 
294
  "num_input_tokens_seen": 0,
295
  "num_train_epochs": 2,
296
  "save_steps": 20,
297
+ "total_flos": 1896312970543104.0,
298
  "train_batch_size": 8,
299
  "trial_name": null,
300
  "trial_params": null