Nexspear commited on
Commit
fbfd565
·
verified ·
1 Parent(s): abef7ad

Training in progress, step 378, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90941e1693a558421377af5791061bc37e7dd6f4ed3b75ca4bda99a4e94b2488
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f85c8bc4ad212684b7993062a23525ac4f791b5229784b4158ac4bce165586
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18e92072ad5cb96ea116208da591f4f9a7a38b1a697688a159b5227c548ac55d
3
  size 51613668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b37c1c4fa9a9e825ea05b1ca35dc1c4929d943b7bddd2f6086f57e6c5064ff4
3
  size 51613668
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1628639e06cf0da96dea52ad1bb1fc7d510f41ec927e99fe48623eb85119f343
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:875830942629bcea9e8c73e146b1df9f89fd36cd8b3e95d6764a6f4e78bd912d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73411ca8ec7e5959b18da4fe8d2de8084ec9d3eedda030beb9eadece4c950464
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f8e14b456d7fc01f5ad21526616d8f86a1c6c40024725332f505f34df4d95a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.050335193438448,
5
  "eval_steps": 42,
6
- "global_step": 336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -863,6 +863,112 @@
863
  "eval_samples_per_second": 35.923,
864
  "eval_steps_per_second": 4.492,
865
  "step": 336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  }
867
  ],
868
  "logging_steps": 3,
@@ -882,7 +988,7 @@
882
  "attributes": {}
883
  }
884
  },
885
- "total_flos": 1.0751071457850163e+17,
886
  "train_batch_size": 8,
887
  "trial_name": null,
888
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.056627092618254,
5
  "eval_steps": 42,
6
+ "global_step": 378,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
863
  "eval_samples_per_second": 35.923,
864
  "eval_steps_per_second": 4.492,
865
  "step": 336
866
+ },
867
+ {
868
+ "epoch": 0.05078461480843414,
869
+ "grad_norm": 0.18624208867549896,
870
+ "learning_rate": 1.217751806485235e-05,
871
+ "loss": 1.1368,
872
+ "step": 339
873
+ },
874
+ {
875
+ "epoch": 0.05123403617842028,
876
+ "grad_norm": 0.18036052584648132,
877
+ "learning_rate": 1.1767125051905315e-05,
878
+ "loss": 1.0682,
879
+ "step": 342
880
+ },
881
+ {
882
+ "epoch": 0.051683457548406427,
883
+ "grad_norm": 0.17344728112220764,
884
+ "learning_rate": 1.1361627469736285e-05,
885
+ "loss": 1.0299,
886
+ "step": 345
887
+ },
888
+ {
889
+ "epoch": 0.05213287891839257,
890
+ "grad_norm": 0.17225950956344604,
891
+ "learning_rate": 1.096117532999063e-05,
892
+ "loss": 1.0217,
893
+ "step": 348
894
+ },
895
+ {
896
+ "epoch": 0.052582300288378715,
897
+ "grad_norm": 0.1770411878824234,
898
+ "learning_rate": 1.0565916777779519e-05,
899
+ "loss": 1.1027,
900
+ "step": 351
901
+ },
902
+ {
903
+ "epoch": 0.05303172165836485,
904
+ "grad_norm": 0.15190331637859344,
905
+ "learning_rate": 1.0175998036874356e-05,
906
+ "loss": 1.0684,
907
+ "step": 354
908
+ },
909
+ {
910
+ "epoch": 0.053481143028351,
911
+ "grad_norm": 0.20575201511383057,
912
+ "learning_rate": 9.791563355612172e-06,
913
+ "loss": 1.0359,
914
+ "step": 357
915
+ },
916
+ {
917
+ "epoch": 0.05393056439833714,
918
+ "grad_norm": 0.2163867950439453,
919
+ "learning_rate": 9.412754953531663e-06,
920
+ "loss": 1.0943,
921
+ "step": 360
922
+ },
923
+ {
924
+ "epoch": 0.054379985768323286,
925
+ "grad_norm": 0.1523335576057434,
926
+ "learning_rate": 9.039712968759864e-06,
927
+ "loss": 1.1357,
928
+ "step": 363
929
+ },
930
+ {
931
+ "epoch": 0.05482940713830943,
932
+ "grad_norm": 0.1748288869857788,
933
+ "learning_rate": 8.672575406168782e-06,
934
+ "loss": 1.0745,
935
+ "step": 366
936
+ },
937
+ {
938
+ "epoch": 0.05527882850829557,
939
+ "grad_norm": 0.16783830523490906,
940
+ "learning_rate": 8.3114780863213e-06,
941
+ "loss": 1.0432,
942
+ "step": 369
943
+ },
944
+ {
945
+ "epoch": 0.05572824987828171,
946
+ "grad_norm": 0.17801252007484436,
947
+ "learning_rate": 7.956554595225016e-06,
948
+ "loss": 1.0649,
949
+ "step": 372
950
+ },
951
+ {
952
+ "epoch": 0.056177671248267856,
953
+ "grad_norm": 0.17944374680519104,
954
+ "learning_rate": 7.607936234912841e-06,
955
+ "loss": 1.0862,
956
+ "step": 375
957
+ },
958
+ {
959
+ "epoch": 0.056627092618254,
960
+ "grad_norm": 0.19096186757087708,
961
+ "learning_rate": 7.265751974868554e-06,
962
+ "loss": 1.0477,
963
+ "step": 378
964
+ },
965
+ {
966
+ "epoch": 0.056627092618254,
967
+ "eval_loss": 1.0896837711334229,
968
+ "eval_runtime": 312.799,
969
+ "eval_samples_per_second": 35.943,
970
+ "eval_steps_per_second": 4.495,
971
+ "step": 378
972
  }
973
  ],
974
  "logging_steps": 3,
 
988
  "attributes": {}
989
  }
990
  },
991
+ "total_flos": 1.2131417357234995e+17,
992
  "train_batch_size": 8,
993
  "trial_name": null,
994
  "trial_params": null