ardaspear commited on
Commit
864e1dc
·
verified ·
1 Parent(s): e9ce324

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a3eea9d1fa6290220cf46b07fb62985e27093d257e77a48314f73c0764625f
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93fc1c475c67deccfc9fedf9532cd31c5a80821693d62383c3cc6240ae81119e
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:153e2d49ba9ba2ccfbf3bfae08347b3f0f38d757bcccfe7fa79912f2e02a5be4
3
  size 37134740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c72bfe6df38d31747971d88448110b98ec5ce45079ee99b0b2f20fbce0c4ed23
3
  size 37134740
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d92ff51f2aa2d47f51b12f0f0bb45e8de9788980d700a69cb0111b82a788b793
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c194e1392a42c2eda06336d0305e55c4adfb29a6e43a5dec7cc518e3e72b0450
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51094b5d327949483be134a2a7ce82f120d34a302bf097e81122d94eff7cf8c6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64297a6969c9113e6582dde9428f08d78a5599aec9c2adf99caa5d81625685a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1063406312197896,
5
  "eval_steps": 34,
6
- "global_step": 374,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -971,6 +971,69 @@
971
  "eval_samples_per_second": 35.237,
972
  "eval_steps_per_second": 4.408,
973
  "step": 374
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
974
  }
975
  ],
976
  "logging_steps": 3,
@@ -985,12 +1048,12 @@
985
  "should_evaluate": false,
986
  "should_log": false,
987
  "should_save": true,
988
- "should_training_stop": false
989
  },
990
  "attributes": {}
991
  }
992
  },
993
- "total_flos": 1.197680065857454e+17,
994
  "train_batch_size": 8,
995
  "trial_name": null,
996
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.11373329542223486,
5
  "eval_steps": 34,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
971
  "eval_samples_per_second": 35.237,
972
  "eval_steps_per_second": 4.408,
973
  "step": 374
974
+ },
975
+ {
976
+ "epoch": 0.10662496445834518,
977
+ "grad_norm": 0.46795177459716797,
978
+ "learning_rate": 5.052336989433082e-07,
979
+ "loss": 0.4216,
980
+ "step": 375
981
+ },
982
+ {
983
+ "epoch": 0.10747796417401194,
984
+ "grad_norm": 0.560074508190155,
985
+ "learning_rate": 3.915515781850565e-07,
986
+ "loss": 0.4089,
987
+ "step": 378
988
+ },
989
+ {
990
+ "epoch": 0.10833096388967871,
991
+ "grad_norm": 0.5042891502380371,
992
+ "learning_rate": 2.922400983217416e-07,
993
+ "loss": 0.4484,
994
+ "step": 381
995
+ },
996
+ {
997
+ "epoch": 0.10918396360534546,
998
+ "grad_norm": 0.4858133792877197,
999
+ "learning_rate": 2.0735725446094923e-07,
1000
+ "loss": 0.4354,
1001
+ "step": 384
1002
+ },
1003
+ {
1004
+ "epoch": 0.11003696332101223,
1005
+ "grad_norm": 0.5573092103004456,
1006
+ "learning_rate": 1.3695261579316777e-07,
1007
+ "loss": 0.3903,
1008
+ "step": 387
1009
+ },
1010
+ {
1011
+ "epoch": 0.11088996303667899,
1012
+ "grad_norm": 0.4502072334289551,
1013
+ "learning_rate": 8.106729664475176e-08,
1014
+ "loss": 0.416,
1015
+ "step": 390
1016
+ },
1017
+ {
1018
+ "epoch": 0.11174296275234574,
1019
+ "grad_norm": 0.5383118987083435,
1020
+ "learning_rate": 3.9733932468333234e-08,
1021
+ "loss": 0.4726,
1022
+ "step": 393
1023
+ },
1024
+ {
1025
+ "epoch": 0.11259596246801251,
1026
+ "grad_norm": 0.6252465844154358,
1027
+ "learning_rate": 1.297666078462767e-08,
1028
+ "loss": 0.4435,
1029
+ "step": 396
1030
+ },
1031
+ {
1032
+ "epoch": 0.11344896218367927,
1033
+ "grad_norm": 0.5062450170516968,
1034
+ "learning_rate": 8.111070868010995e-10,
1035
+ "loss": 0.373,
1036
+ "step": 399
1037
  }
1038
  ],
1039
  "logging_steps": 3,
 
1048
  "should_evaluate": false,
1049
  "should_log": false,
1050
  "should_save": true,
1051
+ "should_training_stop": true
1052
  },
1053
  "attributes": {}
1054
  }
1055
  },
1056
+ "total_flos": 1.280941246906368e+17,
1057
  "train_batch_size": 8,
1058
  "trial_name": null,
1059
  "trial_params": null