Femboyuwu2000 commited on
Commit
98d40f6
1 Parent(s): 4859993

Training in progress, step 9060, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba139240646f433f9f0106d855aea9e350f07f031693b316743dd388f006a45a
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0bac75f5d24a596f406e46fc168b931d808d6267d31438980869f07fbf3a128
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd6c424fd81f222bd0b9ad0388c7d2dd3efa61cf565f0946a945da841c18c43f
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0b8471d164ccb5b5a89e3785c2f86274dfc686377b29ef93b82e37a46816e91
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ac19e6550ed28e30f1e0773f61b5a93cb672dde550a8914d4067b84caffee97
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8161b0139ec703aca722250be53f42af04367d09585f2a6f1c4cab858b114415
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bb2ad4299490e0d014e4083d00933053d6f0c483383ad6b71adbd0a8a72b6c2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34103ee724ff7cce18f877e6016120f3e8e183a8a04d54ad5db73b9ab0f1e98e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7232,
5
  "eval_steps": 500,
6
- "global_step": 9040,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3171,6 +3171,13 @@
3171
  "learning_rate": 1.8039430793163753e-05,
3172
  "loss": 3.5014,
3173
  "step": 9040
 
 
 
 
 
 
 
3174
  }
3175
  ],
3176
  "logging_steps": 20,
@@ -3178,7 +3185,7 @@
3178
  "num_input_tokens_seen": 0,
3179
  "num_train_epochs": 2,
3180
  "save_steps": 20,
3181
- "total_flos": 2.1373183266914304e+16,
3182
  "train_batch_size": 8,
3183
  "trial_name": null,
3184
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7248,
5
  "eval_steps": 500,
6
+ "global_step": 9060,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3171
  "learning_rate": 1.8039430793163753e-05,
3172
  "loss": 3.5014,
3173
  "step": 9040
3174
+ },
3175
+ {
3176
+ "epoch": 0.72,
3177
+ "grad_norm": 21.284992218017578,
3178
+ "learning_rate": 1.7991841394296962e-05,
3179
+ "loss": 3.4575,
3180
+ "step": 9060
3181
  }
3182
  ],
3183
  "logging_steps": 20,
 
3185
  "num_input_tokens_seen": 0,
3186
  "num_train_epochs": 2,
3187
  "save_steps": 20,
3188
+ "total_flos": 2.1428297200041984e+16,
3189
  "train_batch_size": 8,
3190
  "trial_name": null,
3191
  "trial_params": null