ardaspear commited on
Commit
0c3cdfd
·
verified ·
1 Parent(s): 698b2fc

Training in progress, step 374, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b35927f06b80d26e3132911d1799d75251e7e6f71fcf0af72ef64fef8170870a
3
  size 72396376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81a3eea9d1fa6290220cf46b07fb62985e27093d257e77a48314f73c0764625f
3
  size 72396376
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc56f5223c6ddbd7f71b1177c33bd87cfc42ca847c24b1ed458640671a0fbb09
3
  size 37134740
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:153e2d49ba9ba2ccfbf3bfae08347b3f0f38d757bcccfe7fa79912f2e02a5be4
3
  size 37134740
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e070c1a19e2f7fe8d8783d14cf6c4980c084d0861be459da7f60717ee83ba20f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d92ff51f2aa2d47f51b12f0f0bb45e8de9788980d700a69cb0111b82a788b793
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6999f9aad8d44fbf7db1d80d56ad86630abb8e28a7187e80ed24f8546462146
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51094b5d327949483be134a2a7ce82f120d34a302bf097e81122d94eff7cf8c6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09667330110889963,
5
  "eval_steps": 34,
6
- "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -886,6 +886,91 @@
886
  "eval_samples_per_second": 35.267,
887
  "eval_steps_per_second": 4.411,
888
  "step": 340
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  }
890
  ],
891
  "logging_steps": 3,
@@ -905,7 +990,7 @@
905
  "attributes": {}
906
  }
907
  },
908
- "total_flos": 1.0888000598704128e+17,
909
  "train_batch_size": 8,
910
  "trial_name": null,
911
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.1063406312197896,
5
  "eval_steps": 34,
6
+ "global_step": 374,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
886
  "eval_samples_per_second": 35.267,
887
  "eval_steps_per_second": 4.411,
888
  "step": 340
889
+ },
890
+ {
891
+ "epoch": 0.09724196758601081,
892
+ "grad_norm": 0.5438792705535889,
893
+ "learning_rate": 2.679304450853401e-06,
894
+ "loss": 0.4406,
895
+ "step": 342
896
+ },
897
+ {
898
+ "epoch": 0.09809496730167756,
899
+ "grad_norm": 0.6174736022949219,
900
+ "learning_rate": 2.4137391347404476e-06,
901
+ "loss": 0.4503,
902
+ "step": 345
903
+ },
904
+ {
905
+ "epoch": 0.09894796701734433,
906
+ "grad_norm": 0.5348644256591797,
907
+ "learning_rate": 2.1613635589349756e-06,
908
+ "loss": 0.5056,
909
+ "step": 348
910
+ },
911
+ {
912
+ "epoch": 0.09980096673301109,
913
+ "grad_norm": 0.44019100069999695,
914
+ "learning_rate": 1.922325103666281e-06,
915
+ "loss": 0.3926,
916
+ "step": 351
917
+ },
918
+ {
919
+ "epoch": 0.10065396644867786,
920
+ "grad_norm": 0.5055895447731018,
921
+ "learning_rate": 1.696763360660808e-06,
922
+ "loss": 0.5037,
923
+ "step": 354
924
+ },
925
+ {
926
+ "epoch": 0.10150696616434461,
927
+ "grad_norm": 0.5478758215904236,
928
+ "learning_rate": 1.4848100516245717e-06,
929
+ "loss": 0.3948,
930
+ "step": 357
931
+ },
932
+ {
933
+ "epoch": 0.10235996588001137,
934
+ "grad_norm": 0.5242781639099121,
935
+ "learning_rate": 1.286588951321363e-06,
936
+ "loss": 0.4522,
937
+ "step": 360
938
+ },
939
+ {
940
+ "epoch": 0.10321296559567814,
941
+ "grad_norm": 0.5367030501365662,
942
+ "learning_rate": 1.102215815291774e-06,
943
+ "loss": 0.4246,
944
+ "step": 363
945
+ },
946
+ {
947
+ "epoch": 0.10406596531134489,
948
+ "grad_norm": 0.5049583315849304,
949
+ "learning_rate": 9.317983122552332e-07,
950
+ "loss": 0.3515,
951
+ "step": 366
952
+ },
953
+ {
954
+ "epoch": 0.10491896502701166,
955
+ "grad_norm": 0.47986966371536255,
956
+ "learning_rate": 7.754359612344859e-07,
957
+ "loss": 0.3667,
958
+ "step": 369
959
+ },
960
+ {
961
+ "epoch": 0.10577196474267841,
962
+ "grad_norm": 0.5948217511177063,
963
+ "learning_rate": 6.332200734393057e-07,
964
+ "loss": 0.4115,
965
+ "step": 372
966
+ },
967
+ {
968
+ "epoch": 0.1063406312197896,
969
+ "eval_loss": 0.41257256269454956,
970
+ "eval_runtime": 168.1203,
971
+ "eval_samples_per_second": 35.237,
972
+ "eval_steps_per_second": 4.408,
973
+ "step": 374
974
  }
975
  ],
976
  "logging_steps": 3,
 
990
  "attributes": {}
991
  }
992
  },
993
+ "total_flos": 1.197680065857454e+17,
994
  "train_batch_size": 8,
995
  "trial_name": null,
996
  "trial_params": null