0x1202 commited on
Commit
729bf9b
1 Parent(s): cda4af5

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43c3946a5a89d36dfa1fc36228b54d7c970649172043020b6e528b995b6b00fe
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:816a4ec8d4f9ef73c31dbbf25e73cf8ae6af0fc27b317aa556ff26eaf6178d1e
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5301d5597f3da64893b3d9e2e609104f4243ffff9c9302a9bdd9c47057329e6
3
  size 90365754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13306704d6539b57927415351d65adc41718ea580cb04f72f858b1d721b8d475
3
  size 90365754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:819e8f8d7c618e04879ebe16dcdcf92dc0610755b13a10ec192932587c41e3d1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5ce7adad1aada30f43bdfff0900a492d19806b846335afffebce67d207df97
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df88ad9d29a5b994fc668c3ab662b1d4e6baa321c3f5068caf8ff1c21c6e351d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda1b3688163acb4c0de1a0c7c611576b5a46451ca11dac78f3f571adee24be0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.004337400663644075,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.8310502283105023,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -747,6 +747,76 @@
747
  "eval_samples_per_second": 14.334,
748
  "eval_steps_per_second": 2.007,
749
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "logging_steps": 1,
@@ -770,12 +840,12 @@
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
- "should_training_stop": false
774
  },
775
  "attributes": {}
776
  }
777
  },
778
- "total_flos": 2.637346415365325e+16,
779
  "train_batch_size": 8,
780
  "trial_name": null,
781
  "trial_params": null
 
1
  {
2
  "best_metric": 0.004337400663644075,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 2.018264840182648,
5
  "eval_steps": 25,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
747
  "eval_samples_per_second": 14.334,
748
  "eval_steps_per_second": 2.007,
749
  "step": 100
750
+ },
751
+ {
752
+ "epoch": 1.8493150684931505,
753
+ "grad_norm": 0.026043424382805824,
754
+ "learning_rate": 5.95594714845854e-06,
755
+ "loss": 0.0015,
756
+ "step": 101
757
+ },
758
+ {
759
+ "epoch": 1.8675799086757991,
760
+ "grad_norm": 0.05837749317288399,
761
+ "learning_rate": 4.712525830705338e-06,
762
+ "loss": 0.0033,
763
+ "step": 102
764
+ },
765
+ {
766
+ "epoch": 1.8858447488584473,
767
+ "grad_norm": 1.0614242553710938,
768
+ "learning_rate": 3.6124857091878845e-06,
769
+ "loss": 0.0117,
770
+ "step": 103
771
+ },
772
+ {
773
+ "epoch": 1.904109589041096,
774
+ "grad_norm": 0.020967742428183556,
775
+ "learning_rate": 2.656912390696708e-06,
776
+ "loss": 0.001,
777
+ "step": 104
778
+ },
779
+ {
780
+ "epoch": 1.9223744292237441,
781
+ "grad_norm": 0.08478910475969315,
782
+ "learning_rate": 1.8467489107293509e-06,
783
+ "loss": 0.0009,
784
+ "step": 105
785
+ },
786
+ {
787
+ "epoch": 1.9406392694063928,
788
+ "grad_norm": 0.028337517753243446,
789
+ "learning_rate": 1.1827948028283352e-06,
790
+ "loss": 0.0007,
791
+ "step": 106
792
+ },
793
+ {
794
+ "epoch": 1.958904109589041,
795
+ "grad_norm": 1.1033185720443726,
796
+ "learning_rate": 6.657053095380005e-07,
797
+ "loss": 0.0129,
798
+ "step": 107
799
+ },
800
+ {
801
+ "epoch": 1.9771689497716896,
802
+ "grad_norm": 0.06699586659669876,
803
+ "learning_rate": 2.959907357592661e-07,
804
+ "loss": 0.0026,
805
+ "step": 108
806
+ },
807
+ {
808
+ "epoch": 1.9954337899543377,
809
+ "grad_norm": 0.02767285704612732,
810
+ "learning_rate": 7.401594514025999e-08,
811
+ "loss": 0.0019,
812
+ "step": 109
813
+ },
814
+ {
815
+ "epoch": 2.018264840182648,
816
+ "grad_norm": 0.1497020125389099,
817
+ "learning_rate": 0.0,
818
+ "loss": 0.0052,
819
+ "step": 110
820
  }
821
  ],
822
  "logging_steps": 1,
 
840
  "should_evaluate": false,
841
  "should_log": false,
842
  "should_save": true,
843
+ "should_training_stop": true
844
  },
845
  "attributes": {}
846
  }
847
  },
848
+ "total_flos": 2.878974644453376e+16,
849
  "train_batch_size": 8,
850
  "trial_name": null,
851
  "trial_params": null