ardaspear commited on
Commit
9307249
·
verified ·
1 Parent(s): 45569fd

Training in progress, step 396, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7765a191cebb35fd72f70a2a0196505273d2e6e498b1d7786301655c471ca6a0
3
  size 251748704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c62117bd0256894b3042a11cfda644c219e5759cc70ab280da51e51f572c6b73
3
  size 251748704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9367ea9d35c74fdcbdc3cf63d1f05b6d62e63d248adefa508a15fab72532a4
3
  size 128585300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f4c0deac9aef2fa8dd843486a943db0e1f1d76c494a3ef7b8fdf72cf1f88ae
3
  size 128585300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a607d3966f4e9b2eb446795209b0fa90a8b23911c1801843216cc122be3ba84
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940c32e385f23d49af3a4c46827c1518d27934b42fd2fe7cd075707a0ad18459
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08efc2f6cb6c0d4b11182b283e8aca4d4d30744220189f7acb1a0db30a120f3f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30280cabde0ed06f0e6140c1628262536698ba51c383111034150345e300b03c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.526315789473684,
5
  "eval_steps": 36,
6
- "global_step": 360,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -935,6 +935,98 @@
935
  "eval_samples_per_second": 5.029,
936
  "eval_steps_per_second": 0.629,
937
  "step": 360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
938
  }
939
  ],
940
  "logging_steps": 3,
@@ -954,7 +1046,7 @@
954
  "attributes": {}
955
  }
956
  },
957
- "total_flos": 1.4754141210201293e+18,
958
  "train_batch_size": 8,
959
  "trial_name": null,
960
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7789473684210524,
5
  "eval_steps": 36,
6
+ "global_step": 396,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
935
  "eval_samples_per_second": 5.029,
936
  "eval_steps_per_second": 0.629,
937
  "step": 360
938
+ },
939
+ {
940
+ "epoch": 2.5473684210526315,
941
+ "grad_norm": 5.408112049102783,
942
+ "learning_rate": 2.9243502413064368e-06,
943
+ "loss": 0.6439,
944
+ "step": 363
945
+ },
946
+ {
947
+ "epoch": 2.568421052631579,
948
+ "grad_norm": 3.7381534576416016,
949
+ "learning_rate": 2.6654338838876665e-06,
950
+ "loss": 0.9288,
951
+ "step": 366
952
+ },
953
+ {
954
+ "epoch": 2.5894736842105264,
955
+ "grad_norm": 4.740654468536377,
956
+ "learning_rate": 2.4178715244404794e-06,
957
+ "loss": 0.9505,
958
+ "step": 369
959
+ },
960
+ {
961
+ "epoch": 2.610526315789474,
962
+ "grad_norm": 4.9893364906311035,
963
+ "learning_rate": 2.1817890137430934e-06,
964
+ "loss": 1.046,
965
+ "step": 372
966
+ },
967
+ {
968
+ "epoch": 2.6315789473684212,
969
+ "grad_norm": 4.344699382781982,
970
+ "learning_rate": 1.9573063666788875e-06,
971
+ "loss": 0.8301,
972
+ "step": 375
973
+ },
974
+ {
975
+ "epoch": 2.6526315789473687,
976
+ "grad_norm": 2.871662139892578,
977
+ "learning_rate": 1.7445377012256126e-06,
978
+ "loss": 0.6642,
979
+ "step": 378
980
+ },
981
+ {
982
+ "epoch": 2.6736842105263157,
983
+ "grad_norm": 3.569286346435547,
984
+ "learning_rate": 1.5435911804424357e-06,
985
+ "loss": 0.8558,
986
+ "step": 381
987
+ },
988
+ {
989
+ "epoch": 2.694736842105263,
990
+ "grad_norm": 4.009424209594727,
991
+ "learning_rate": 1.3545689574841342e-06,
992
+ "loss": 0.8686,
993
+ "step": 384
994
+ },
995
+ {
996
+ "epoch": 2.7157894736842105,
997
+ "grad_norm": 3.5932652950286865,
998
+ "learning_rate": 1.1775671236705365e-06,
999
+ "loss": 1.0848,
1000
+ "step": 387
1001
+ },
1002
+ {
1003
+ "epoch": 2.736842105263158,
1004
+ "grad_norm": 4.354364395141602,
1005
+ "learning_rate": 1.0126756596375686e-06,
1006
+ "loss": 1.1122,
1007
+ "step": 390
1008
+ },
1009
+ {
1010
+ "epoch": 2.7578947368421054,
1011
+ "grad_norm": 3.184096336364746,
1012
+ "learning_rate": 8.599783895946761e-07,
1013
+ "loss": 0.8129,
1014
+ "step": 393
1015
+ },
1016
+ {
1017
+ "epoch": 2.7789473684210524,
1018
+ "grad_norm": 4.265777587890625,
1019
+ "learning_rate": 7.195529387119815e-07,
1020
+ "loss": 0.7224,
1021
+ "step": 396
1022
+ },
1023
+ {
1024
+ "epoch": 2.7789473684210524,
1025
+ "eval_loss": 0.3112446963787079,
1026
+ "eval_runtime": 47.7893,
1027
+ "eval_samples_per_second": 5.022,
1028
+ "eval_steps_per_second": 0.628,
1029
+ "step": 396
1030
  }
1031
  ],
1032
  "logging_steps": 3,
 
1046
  "attributes": {}
1047
  }
1048
  },
1049
+ "total_flos": 1.6247899734147072e+18,
1050
  "train_batch_size": 8,
1051
  "trial_name": null,
1052
  "trial_params": null