leixa commited on
Commit
7b62fb9
·
verified ·
1 Parent(s): f52ec61

Training in progress, step 378, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c845921dd5d38c1bc9a1cff2de771649f69d3bc50aeb640737c71d999adb45d0
3
  size 341314644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaac6881df4f9c17d91449206e0a2a594f42c6e20b4c268fe7c634c24ea1a23b
3
  size 341314644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e6155b2f406e14ae258f21b07526ec14202a94f19efd93172b191f2e0672456
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019d764b8c1173a11348d363d9c80b9a732cc706db87376a2e2f30a82e520afa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10affc9ced28dcfaf0d40e3497a97c8e7416bd057324538f99a7e1756fd84408
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac207b57c6cefba3838e335ba7ebf320ffdaee8162f1c0afc72ea9ad9f0725f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.017857854665763145,
5
  "eval_steps": 42,
6
- "global_step": 336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -863,6 +863,112 @@
863
  "eval_samples_per_second": 13.378,
864
  "eval_steps_per_second": 1.673,
865
  "step": 336
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
  }
867
  ],
868
  "logging_steps": 3,
@@ -882,7 +988,7 @@
882
  "attributes": {}
883
  }
884
  },
885
- "total_flos": 5.13648873562964e+17,
886
  "train_batch_size": 8,
887
  "trial_name": null,
888
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.02009008649898354,
5
  "eval_steps": 42,
6
+ "global_step": 378,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
863
  "eval_samples_per_second": 13.378,
864
  "eval_steps_per_second": 1.673,
865
  "step": 336
866
+ },
867
+ {
868
+ "epoch": 0.018017299796707457,
869
+ "grad_norm": NaN,
870
+ "learning_rate": 2.43550361297047e-05,
871
+ "loss": 0.0,
872
+ "step": 339
873
+ },
874
+ {
875
+ "epoch": 0.018176744927651773,
876
+ "grad_norm": NaN,
877
+ "learning_rate": 2.353425010381063e-05,
878
+ "loss": 0.0,
879
+ "step": 342
880
+ },
881
+ {
882
+ "epoch": 0.018336190058596085,
883
+ "grad_norm": NaN,
884
+ "learning_rate": 2.272325493947257e-05,
885
+ "loss": 0.0,
886
+ "step": 345
887
+ },
888
+ {
889
+ "epoch": 0.0184956351895404,
890
+ "grad_norm": NaN,
891
+ "learning_rate": 2.192235065998126e-05,
892
+ "loss": 0.0,
893
+ "step": 348
894
+ },
895
+ {
896
+ "epoch": 0.018655080320484713,
897
+ "grad_norm": NaN,
898
+ "learning_rate": 2.1131833555559037e-05,
899
+ "loss": 0.0,
900
+ "step": 351
901
+ },
902
+ {
903
+ "epoch": 0.018814525451429028,
904
+ "grad_norm": NaN,
905
+ "learning_rate": 2.0351996073748713e-05,
906
+ "loss": 0.0,
907
+ "step": 354
908
+ },
909
+ {
910
+ "epoch": 0.01897397058237334,
911
+ "grad_norm": NaN,
912
+ "learning_rate": 1.9583126711224343e-05,
913
+ "loss": 0.0,
914
+ "step": 357
915
+ },
916
+ {
917
+ "epoch": 0.019133415713317656,
918
+ "grad_norm": NaN,
919
+ "learning_rate": 1.8825509907063327e-05,
920
+ "loss": 0.0,
921
+ "step": 360
922
+ },
923
+ {
924
+ "epoch": 0.019292860844261968,
925
+ "grad_norm": NaN,
926
+ "learning_rate": 1.807942593751973e-05,
927
+ "loss": 0.0,
928
+ "step": 363
929
+ },
930
+ {
931
+ "epoch": 0.019452305975206283,
932
+ "grad_norm": NaN,
933
+ "learning_rate": 1.7345150812337564e-05,
934
+ "loss": 0.0,
935
+ "step": 366
936
+ },
937
+ {
938
+ "epoch": 0.019611751106150595,
939
+ "grad_norm": NaN,
940
+ "learning_rate": 1.66229561726426e-05,
941
+ "loss": 0.0,
942
+ "step": 369
943
+ },
944
+ {
945
+ "epoch": 0.01977119623709491,
946
+ "grad_norm": NaN,
947
+ "learning_rate": 1.5913109190450032e-05,
948
+ "loss": 0.0,
949
+ "step": 372
950
+ },
951
+ {
952
+ "epoch": 0.019930641368039223,
953
+ "grad_norm": NaN,
954
+ "learning_rate": 1.5215872469825682e-05,
955
+ "loss": 0.0,
956
+ "step": 375
957
+ },
958
+ {
959
+ "epoch": 0.02009008649898354,
960
+ "grad_norm": NaN,
961
+ "learning_rate": 1.4531503949737108e-05,
962
+ "loss": 0.0,
963
+ "step": 378
964
+ },
965
+ {
966
+ "epoch": 0.02009008649898354,
967
+ "eval_loss": NaN,
968
+ "eval_runtime": 2368.4689,
969
+ "eval_samples_per_second": 13.38,
970
+ "eval_steps_per_second": 1.673,
971
+ "step": 378
972
  }
973
  ],
974
  "logging_steps": 3,
 
988
  "attributes": {}
989
  }
990
  },
991
+ "total_flos": 5.785149574490358e+17,
992
  "train_batch_size": 8,
993
  "trial_name": null,
994
  "trial_params": null