Whispful commited on
Commit
657aedb
·
verified ·
1 Parent(s): e9e4b23

Training in progress, step 114, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47159059fb382dcd4d40fbb428d89bf3dd755e80340f619f0134f67f4c1227b4
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e64200320a6a195854d24d4094d672d6b43bc2da942851476a2d5505324513
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76825cc7a0716b7bd22f7ea323d9db392183f43f53ffa6283fe6c854b826950a
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0449816ff3dc21bd97bee9e273b3bd2ddfb545179435edc1e4779826c4b8822
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d66610bbf3849e4d33837f0f26c218ea8f8d5a6d312408d13439f311da97f100
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec41f20afdc2c3e9b0efc876c1cd4be9b59db158d8a108580028a02a218ca8ff
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb0c43f77f7e2c8d7164b366fb11f55cae2daf08f4dea1a817d11f101787129
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30f9e384a1bde4cb88eb41e7604c37401b8069c041cf6ca3c08abcac7465a4f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.527793288230896,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.05224063341768019,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -747,6 +747,104 @@
747
  "eval_samples_per_second": 2.783,
748
  "eval_steps_per_second": 2.783,
749
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "logging_steps": 1,
@@ -770,12 +868,12 @@
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
- "should_training_stop": false
774
  },
775
  "attributes": {}
776
  }
777
  },
778
- "total_flos": 2.829010669142016e+17,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null
 
1
  {
2
  "best_metric": 0.527793288230896,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.05955432209615542,
5
  "eval_steps": 25,
6
+ "global_step": 114,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
747
  "eval_samples_per_second": 2.783,
748
  "eval_steps_per_second": 2.783,
749
  "step": 100
750
+ },
751
+ {
752
+ "epoch": 0.05276303975185699,
753
+ "grad_norm": 80.93624114990234,
754
+ "learning_rate": 2.61322373709693e-05,
755
+ "loss": 14.8852,
756
+ "step": 101
757
+ },
758
+ {
759
+ "epoch": 0.05328544608603379,
760
+ "grad_norm": 93.81378936767578,
761
+ "learning_rate": 2.523400727244279e-05,
762
+ "loss": 19.7007,
763
+ "step": 102
764
+ },
765
+ {
766
+ "epoch": 0.05380785242021059,
767
+ "grad_norm": 76.47543334960938,
768
+ "learning_rate": 2.4404913533436186e-05,
769
+ "loss": 16.0862,
770
+ "step": 103
771
+ },
772
+ {
773
+ "epoch": 0.0543302587543874,
774
+ "grad_norm": 83.75788116455078,
775
+ "learning_rate": 2.3645632374695246e-05,
776
+ "loss": 18.1107,
777
+ "step": 104
778
+ },
779
+ {
780
+ "epoch": 0.0548526650885642,
781
+ "grad_norm": 97.02025604248047,
782
+ "learning_rate": 2.295678307682387e-05,
783
+ "loss": 19.144,
784
+ "step": 105
785
+ },
786
+ {
787
+ "epoch": 0.055375071422741,
788
+ "grad_norm": 82.75303649902344,
789
+ "learning_rate": 2.233892747518999e-05,
790
+ "loss": 19.5145,
791
+ "step": 106
792
+ },
793
+ {
794
+ "epoch": 0.0558974777569178,
795
+ "grad_norm": 67.65582275390625,
796
+ "learning_rate": 2.179256950168459e-05,
797
+ "loss": 15.1443,
798
+ "step": 107
799
+ },
800
+ {
801
+ "epoch": 0.05641988409109461,
802
+ "grad_norm": 119.88687133789062,
803
+ "learning_rate": 2.1318154773707632e-05,
804
+ "loss": 18.0138,
805
+ "step": 108
806
+ },
807
+ {
808
+ "epoch": 0.05694229042527141,
809
+ "grad_norm": 78.50994110107422,
810
+ "learning_rate": 2.0916070230716063e-05,
811
+ "loss": 15.5937,
812
+ "step": 109
813
+ },
814
+ {
815
+ "epoch": 0.05746469675944821,
816
+ "grad_norm": 81.8220443725586,
817
+ "learning_rate": 2.0586643818630484e-05,
818
+ "loss": 16.9296,
819
+ "step": 110
820
+ },
821
+ {
822
+ "epoch": 0.05798710309362501,
823
+ "grad_norm": 84.6229248046875,
824
+ "learning_rate": 2.0330144222357784e-05,
825
+ "loss": 16.015,
826
+ "step": 111
827
+ },
828
+ {
829
+ "epoch": 0.05850950942780181,
830
+ "grad_norm": 59.201263427734375,
831
+ "learning_rate": 2.0146780646647795e-05,
832
+ "loss": 12.9246,
833
+ "step": 112
834
+ },
835
+ {
836
+ "epoch": 0.059031915761978616,
837
+ "grad_norm": 107.85786437988281,
838
+ "learning_rate": 2.003670264546297e-05,
839
+ "loss": 15.2457,
840
+ "step": 113
841
+ },
842
+ {
843
+ "epoch": 0.05955432209615542,
844
+ "grad_norm": 68.8541030883789,
845
+ "learning_rate": 2e-05,
846
+ "loss": 12.7823,
847
+ "step": 114
848
  }
849
  ],
850
  "logging_steps": 1,
 
868
  "should_evaluate": false,
869
  "should_log": false,
870
  "should_save": true,
871
+ "should_training_stop": true
872
  },
873
  "attributes": {}
874
  }
875
  },
876
+ "total_flos": 3.225072162821898e+17,
877
  "train_batch_size": 1,
878
  "trial_name": null,
879
  "trial_params": null