penglingwei commited on
Commit
5e0bfce
1 Parent(s): d044954

Training in progress, step 1300, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f969a0948bed2c651ee0376e2b9530983d42d8fddce6465c47996fa8342abda
3
  size 355970836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf7eede2c0f441c39c73721f8eea4ac4a9ec03d59061124d3dd5609631233af
3
  size 355970836
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca285aebd9342dc71d9a519550a808d80031aa6d14950f0a951d219847fda494
3
  size 712036922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8447e053424c0e0cc6a81dcab5f337ff796f160b012afda0fab232e4585dcc6b
3
  size 712036922
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57a98fbdae072481cd4a04e4dba40aca5dd72de7f8f116f374b919687e01043f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14826f87a6d1f4597dc64c05441ab4b32976806d7c9394950c23908c00ccd88c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de12a84c16ca7d4d35cb56d57baea7004d86cd548bee0fcca8eb591e1f605582
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81cbdfee13cde875628c2c67c528d4aa034fcb58929907fe962e81ec278cbe24
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.024414202199185556,
5
  "eval_steps": 500,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -847,6 +847,76 @@
847
  "learning_rate": 3.1234716720210725e-06,
848
  "loss": 192.2185,
849
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
850
  }
851
  ],
852
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.026448719049117688,
5
  "eval_steps": 500,
6
+ "global_step": 1300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
847
  "learning_rate": 3.1234716720210725e-06,
848
  "loss": 192.2185,
849
  "step": 1200
850
+ },
851
+ {
852
+ "epoch": 0.02461765388417877,
853
+ "grad_norm": 1015843.75,
854
+ "learning_rate": 3.123426731137647e-06,
855
+ "loss": 209.0567,
856
+ "step": 1210
857
+ },
858
+ {
859
+ "epoch": 0.024821105569171983,
860
+ "grad_norm": 274952.09375,
861
+ "learning_rate": 3.123381139377551e-06,
862
+ "loss": 203.7306,
863
+ "step": 1220
864
+ },
865
+ {
866
+ "epoch": 0.025024557254165195,
867
+ "grad_norm": 1192346.875,
868
+ "learning_rate": 3.1233348967597942e-06,
869
+ "loss": 196.9537,
870
+ "step": 1230
871
+ },
872
+ {
873
+ "epoch": 0.02522800893915841,
874
+ "grad_norm": 300915.9375,
875
+ "learning_rate": 3.123288003303661e-06,
876
+ "loss": 194.9343,
877
+ "step": 1240
878
+ },
879
+ {
880
+ "epoch": 0.025431460624151622,
881
+ "grad_norm": 371443.53125,
882
+ "learning_rate": 3.123240459028703e-06,
883
+ "loss": 198.7411,
884
+ "step": 1250
885
+ },
886
+ {
887
+ "epoch": 0.025634912309144834,
888
+ "grad_norm": 224521.265625,
889
+ "learning_rate": 3.1231922639547463e-06,
890
+ "loss": 201.7991,
891
+ "step": 1260
892
+ },
893
+ {
894
+ "epoch": 0.02583836399413805,
895
+ "grad_norm": 1233164.625,
896
+ "learning_rate": 3.1231434181018874e-06,
897
+ "loss": 222.2058,
898
+ "step": 1270
899
+ },
900
+ {
901
+ "epoch": 0.02604181567913126,
902
+ "grad_norm": 190394.28125,
903
+ "learning_rate": 3.123093921490494e-06,
904
+ "loss": 211.7859,
905
+ "step": 1280
906
+ },
907
+ {
908
+ "epoch": 0.026245267364124473,
909
+ "grad_norm": 369118.59375,
910
+ "learning_rate": 3.1230437741412052e-06,
911
+ "loss": 181.8996,
912
+ "step": 1290
913
+ },
914
+ {
915
+ "epoch": 0.026448719049117688,
916
+ "grad_norm": 1282949.5,
917
+ "learning_rate": 3.122992976074931e-06,
918
+ "loss": 200.6364,
919
+ "step": 1300
920
  }
921
  ],
922
  "logging_steps": 10,