Femboyuwu2000 commited on
Commit
0c4f8cd
1 Parent(s): ee08e56

Training in progress, step 960, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca9ff2623ebc59ce1f05948fb7f333347562f31720ea83bd9c654d07132bc456
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd6935101c95c7a3fcf35393fcef907ca744d140d002e80f3e1ab8bc68740781
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5979893651a6ad117a2ca79acc918df76f51b5615316af74b15fc3790d5db672
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efbc01513003cd4778a5d7c40f35733a97acc954ec9ed9a205fe5bb1048cad2d
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175dfe02b8f41adb5d4468faa982ed5a51b1c1cd012483cfc61f70c9241471b4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a60dce02b424b527030edce82260ee74eebf81dd57466a5e2b83b22d49a22b4a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f1c7c4b3a6117ae282f791966d823034e6914b8063785569ba52a491b2ee826
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b961e5bd3f5ce2b76de5850c52ad545f91b84e33c7fc906eadd1fa38681efcd4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0752,
5
  "eval_steps": 500,
6
- "global_step": 940,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -336,6 +336,13 @@
336
  "learning_rate": 2.9977269673842554e-05,
337
  "loss": 3.6172,
338
  "step": 940
 
 
 
 
 
 
 
339
  }
340
  ],
341
  "logging_steps": 20,
@@ -343,7 +350,7 @@
343
  "num_input_tokens_seen": 0,
344
  "num_train_epochs": 2,
345
  "save_steps": 20,
346
- "total_flos": 2230933278818304.0,
347
  "train_batch_size": 8,
348
  "trial_name": null,
349
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0768,
5
  "eval_steps": 500,
6
+ "global_step": 960,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
336
  "learning_rate": 2.9977269673842554e-05,
337
  "loss": 3.6172,
338
  "step": 940
339
+ },
340
+ {
341
+ "epoch": 0.08,
342
+ "grad_norm": 58.2718620300293,
343
+ "learning_rate": 2.997451764584951e-05,
344
+ "loss": 3.7494,
345
+ "step": 960
346
  }
347
  ],
348
  "logging_steps": 20,
 
350
  "num_input_tokens_seen": 0,
351
  "num_train_epochs": 2,
352
  "save_steps": 20,
353
+ "total_flos": 2278337822490624.0,
354
  "train_batch_size": 8,
355
  "trial_name": null,
356
  "trial_params": null