lesso08 commited on
Commit
72e72cd
·
verified ·
1 Parent(s): 9494f42

Training in progress, step 118, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a23b7052f6442b5e57e061a9ec12c4555b1a66baaea0bf2e069f4de2dcc01771
3
  size 78207176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63cfee5878ab65db5b705ec6ea806281ca700988be1b3e3e74c211ea2647b1db
3
  size 78207176
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ade28142ea9fd87d7fa415e1c947dd73351736915da0c3a90896180c9cab1afb
3
  size 40177764
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad4e2fd44e8e8e8a558ec913b02829bdfa4e69a7c97eb3a8f9f3aabfad67de0
3
  size 40177764
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c44b3e4b73abe7a23907fe168d7be6e78eb1a98f65b325b6165b1cf3630ff28c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43e6292cda0efeb1e6132b0ec0fcb77f2d4b70b28c60fedc3fc6b80f99b24306
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7bd5b05b92519d2726ffee16e42808f69da54027b63672bcf6975fb353217e4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b3bcdc466eb73eebfd9675632f258530c2eeec6e827633f8970adfddd0de13c
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e25f604c47c60866866955f607de55bfd3c9d658f41f3b624581a08f7ae974bd
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3774654a80525f2e7d0c05903fae9492ccc0d6b1e76f91d989b07516a151da74
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2768b22c9df76071dac580ad1afc0b8998c425a269dfebe53b087908a30a23ec
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c289fbc2df42c6cb2efdede3def967473268adb1bf30caae90bc8595eaee2fe
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53c92cf1bacb12b4d4201ed0d262a3916ea3cb93177659a32fa587afa665a707
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29cb203e73a43b847447a70852c414e0fe34eee7702678e59166287df8b6db17
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:177e7c915929fd7e70aa37d949f3a428f0cb3365b8fcc1d2447a644a0521a136
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54f91fff101b03e8a32a3a388ec5c325dc89247d0110173a8d3b338bbdac0636
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b6e6d9fae51bf6acdae853662ca2a80b8f4008cb3fe3616c8fed9e7feeb766b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008f47c4841f931098e95b821c0d31865adfff64150cb817ae3d0b7a9f8422c0
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60e032504bc328692f846fde4443e0fee942c216f6f265dfe78ed063fc7efd5e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a74062505e8d2d852ceb1243ee3149c6fcb8e05e0cc708541e00acc50125aa0b
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1565be9b29d4ad530db38223d84b3671e6b3a1aabac8097e44e5c246994c335a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b03eaaf9606b705beb16eda7d7500cdc94d6c1e550ef8fa1f732c8309e781a6
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7587078809738159,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 2.5316455696202533,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,132 @@
731
  "eval_samples_per_second": 474.105,
732
  "eval_steps_per_second": 15.207,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -754,12 +880,12 @@
754
  "should_evaluate": false,
755
  "should_log": false,
756
  "should_save": true,
757
- "should_training_stop": false
758
  },
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 2.00864304201728e+16,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7587078809738159,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 2.9873417721518987,
5
  "eval_steps": 50,
6
+ "global_step": 118,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 474.105,
732
  "eval_steps_per_second": 15.207,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 2.5569620253164556,
737
+ "grad_norm": 0.0875733345746994,
738
+ "learning_rate": 6.469124876274007e-06,
739
+ "loss": 0.7216,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 2.5822784810126582,
744
+ "grad_norm": 0.12728005647659302,
745
+ "learning_rate": 5.743837422535743e-06,
746
+ "loss": 0.7692,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 2.607594936708861,
751
+ "grad_norm": 0.15928637981414795,
752
+ "learning_rate": 5.059379500020903e-06,
753
+ "loss": 0.7668,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 2.632911392405063,
758
+ "grad_norm": 0.40893128514289856,
759
+ "learning_rate": 4.41633022846521e-06,
760
+ "loss": 0.7803,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.6582278481012658,
765
+ "grad_norm": 0.2984897792339325,
766
+ "learning_rate": 3.815233691751582e-06,
767
+ "loss": 0.7272,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.6835443037974684,
772
+ "grad_norm": 0.19053974747657776,
773
+ "learning_rate": 3.2565984775609507e-06,
774
+ "loss": 0.727,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.708860759493671,
779
+ "grad_norm": 0.13291624188423157,
780
+ "learning_rate": 2.7408972470561695e-06,
781
+ "loss": 0.7532,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.7341772151898733,
786
+ "grad_norm": 0.12510888278484344,
787
+ "learning_rate": 2.2685663349635993e-06,
788
+ "loss": 0.745,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.759493670886076,
793
+ "grad_norm": 0.13656283915042877,
794
+ "learning_rate": 1.840005380390317e-06,
795
+ "loss": 0.7622,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.7848101265822782,
800
+ "grad_norm": 0.2949647009372711,
801
+ "learning_rate": 1.4555769886895145e-06,
802
+ "loss": 0.7986,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.810126582278481,
807
+ "grad_norm": 0.33257901668548584,
808
+ "learning_rate": 1.1156064246602202e-06,
809
+ "loss": 0.7223,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.8354430379746836,
814
+ "grad_norm": 0.2141508013010025,
815
+ "learning_rate": 8.203813373407668e-07,
816
+ "loss": 0.725,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.8607594936708862,
821
+ "grad_norm": 0.133058562874794,
822
+ "learning_rate": 5.701515166289986e-07,
823
+ "loss": 0.7342,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.8860759493670884,
828
+ "grad_norm": 0.06634331494569778,
829
+ "learning_rate": 3.6512868193507675e-07,
830
+ "loss": 0.74,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.911392405063291,
835
+ "grad_norm": 0.17949539422988892,
836
+ "learning_rate": 2.0548630304574055e-07,
837
+ "loss": 0.7621,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.9367088607594938,
842
+ "grad_norm": 0.2013615071773529,
843
+ "learning_rate": 9.135945335151851e-08,
844
+ "loss": 0.7554,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.962025316455696,
849
+ "grad_norm": 0.2054380476474762,
850
+ "learning_rate": 2.2844695561167638e-08,
851
+ "loss": 0.7493,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.9873417721518987,
856
+ "grad_norm": 0.1201392188668251,
857
+ "learning_rate": 0.0,
858
+ "loss": 0.7322,
859
+ "step": 118
860
  }
861
  ],
862
  "logging_steps": 1,
 
880
  "should_evaluate": false,
881
  "should_log": false,
882
  "should_save": true,
883
+ "should_training_stop": true
884
  },
885
  "attributes": {}
886
  }
887
  },
888
+ "total_flos": 2.368962701348045e+16,
889
  "train_batch_size": 8,
890
  "trial_name": null,
891
  "trial_params": null