leixa commited on
Commit
4ad43cb
1 Parent(s): 8158afb

Training in progress, step 341, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80811b878fbdd5efdd4f08e7b0bd3a4c9633c207c497739ecac63cd5a9eca669
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10462e7bb907e56f44911b66dc9d08e780a22980744541436cfa57fd33089b28
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e73ae7c4dc344be3442d36391cb4ba52cfd5d97e4bb25bc22948ccdfd1f87bdf
3
  size 325340244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1288067af2f92d91e698359b4e37f06863ae4944657fba1418f6b5221da7ac34
3
  size 325340244
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:876d99865a3847c750dc39ca4a3b8622be8f667a3f24db9221e48742faf0c0fe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fad81ea991ac687f6089a33e4df7b4989f6dc0a113bcc48d47ff9341825e8c3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68293b71e7e6e68b301413d0698f3727535763528fb8f34f0fe9f273150e3e61
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a23e869a07ea343caba66ddef1ef2a01435b58bcc15218ec50d03fed44b9143
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5152129817444218,
5
  "eval_steps": 31,
6
- "global_step": 310,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -816,6 +816,84 @@
816
  "eval_samples_per_second": 15.481,
817
  "eval_steps_per_second": 1.935,
818
  "step": 310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
819
  }
820
  ],
821
  "logging_steps": 3,
@@ -835,7 +913,7 @@
835
  "attributes": {}
836
  }
837
  },
838
- "total_flos": 4.118715981197476e+17,
839
  "train_batch_size": 8,
840
  "trial_name": null,
841
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.766734279918864,
5
  "eval_steps": 31,
6
+ "global_step": 341,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
816
  "eval_samples_per_second": 15.481,
817
  "eval_steps_per_second": 1.935,
818
  "step": 310
819
+ },
820
+ {
821
+ "epoch": 2.5314401622718052,
822
+ "grad_norm": 4.959964752197266,
823
+ "learning_rate": 6.269014643030213e-06,
824
+ "loss": 0.9463,
825
+ "step": 312
826
+ },
827
+ {
828
+ "epoch": 2.5557809330628802,
829
+ "grad_norm": 5.439755916595459,
830
+ "learning_rate": 5.649458341088915e-06,
831
+ "loss": 0.8273,
832
+ "step": 315
833
+ },
834
+ {
835
+ "epoch": 2.5801217038539552,
836
+ "grad_norm": 5.629069805145264,
837
+ "learning_rate": 5.060297685041659e-06,
838
+ "loss": 0.9228,
839
+ "step": 318
840
+ },
841
+ {
842
+ "epoch": 2.6044624746450307,
843
+ "grad_norm": 6.254184246063232,
844
+ "learning_rate": 4.501936456172845e-06,
845
+ "loss": 0.8723,
846
+ "step": 321
847
+ },
848
+ {
849
+ "epoch": 2.6288032454361057,
850
+ "grad_norm": 5.284829616546631,
851
+ "learning_rate": 3.974757327377981e-06,
852
+ "loss": 0.7642,
853
+ "step": 324
854
+ },
855
+ {
856
+ "epoch": 2.6531440162271807,
857
+ "grad_norm": 5.344634056091309,
858
+ "learning_rate": 3.479121600898777e-06,
859
+ "loss": 0.849,
860
+ "step": 327
861
+ },
862
+ {
863
+ "epoch": 2.6774847870182557,
864
+ "grad_norm": 5.416316509246826,
865
+ "learning_rate": 3.0153689607045845e-06,
866
+ "loss": 0.9429,
867
+ "step": 330
868
+ },
869
+ {
870
+ "epoch": 2.7018255578093306,
871
+ "grad_norm": 6.064454555511475,
872
+ "learning_rate": 2.583817239690034e-06,
873
+ "loss": 0.8065,
874
+ "step": 333
875
+ },
876
+ {
877
+ "epoch": 2.7261663286004056,
878
+ "grad_norm": 4.958108425140381,
879
+ "learning_rate": 2.1847622018482283e-06,
880
+ "loss": 0.8804,
881
+ "step": 336
882
+ },
883
+ {
884
+ "epoch": 2.7505070993914806,
885
+ "grad_norm": 5.65994119644165,
886
+ "learning_rate": 1.8184773395688526e-06,
887
+ "loss": 0.9978,
888
+ "step": 339
889
+ },
890
+ {
891
+ "epoch": 2.766734279918864,
892
+ "eval_loss": 0.5425376296043396,
893
+ "eval_runtime": 13.4517,
894
+ "eval_samples_per_second": 15.463,
895
+ "eval_steps_per_second": 1.933,
896
+ "step": 341
897
  }
898
  ],
899
  "logging_steps": 3,
 
913
  "attributes": {}
914
  }
915
  },
916
+ "total_flos": 4.531169671218463e+17,
917
  "train_batch_size": 8,
918
  "trial_name": null,
919
  "trial_params": null