ardaspear commited on
Commit
289d53b
·
verified ·
1 Parent(s): e899ad3

Training in progress, step 428, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c62117bd0256894b3042a11cfda644c219e5759cc70ab280da51e51f572c6b73
3
  size 251748704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fadece87f7d0b5bf7ec45f9ace6d987bc11dc65facb1ca4869de2b1ee32d3b0
3
  size 251748704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23f4c0deac9aef2fa8dd843486a943db0e1f1d76c494a3ef7b8fdf72cf1f88ae
3
  size 128585300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1603142e24f463a84d869b06e66215c2cc49f7dd7f149e46aabd1520bb6f37e
3
  size 128585300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:940c32e385f23d49af3a4c46827c1518d27934b42fd2fe7cd075707a0ad18459
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eddf67924b73cf743cd854b86e19ef82315aea0a3cd5a35508eada9db1b616d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30280cabde0ed06f0e6140c1628262536698ba51c383111034150345e300b03c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db80f425fe1be43ea09e81fd5c5aa29383b214af5093b2dc266b56c382e52827
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.7789473684210524,
5
  "eval_steps": 36,
6
- "global_step": 396,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1027,6 +1027,76 @@
1027
  "eval_samples_per_second": 5.022,
1028
  "eval_steps_per_second": 0.628,
1029
  "step": 396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1030
  }
1031
  ],
1032
  "logging_steps": 3,
@@ -1041,12 +1111,12 @@
1041
  "should_evaluate": false,
1042
  "should_log": false,
1043
  "should_save": true,
1044
- "should_training_stop": false
1045
  },
1046
  "attributes": {}
1047
  }
1048
  },
1049
- "total_flos": 1.6247899734147072e+18,
1050
  "train_batch_size": 8,
1051
  "trial_name": null,
1052
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0035087719298246,
5
  "eval_steps": 36,
6
+ "global_step": 428,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1027
  "eval_samples_per_second": 5.022,
1028
  "eval_steps_per_second": 0.628,
1029
  "step": 396
1030
+ },
1031
+ {
1032
+ "epoch": 2.8,
1033
+ "grad_norm": 3.4699087142944336,
1034
+ "learning_rate": 5.914706936587494e-07,
1035
+ "loss": 0.614,
1036
+ "step": 399
1037
+ },
1038
+ {
1039
+ "epoch": 2.8210526315789473,
1040
+ "grad_norm": 2.6950035095214844,
1041
+ "learning_rate": 4.75796766313269e-07,
1042
+ "loss": 0.9641,
1043
+ "step": 402
1044
+ },
1045
+ {
1046
+ "epoch": 2.8421052631578947,
1047
+ "grad_norm": 4.25594425201416,
1048
+ "learning_rate": 3.7258996066258103e-07,
1049
+ "loss": 0.736,
1050
+ "step": 405
1051
+ },
1052
+ {
1053
+ "epoch": 2.863157894736842,
1054
+ "grad_norm": 3.8812239170074463,
1055
+ "learning_rate": 2.819027429088822e-07,
1056
+ "loss": 0.7287,
1057
+ "step": 408
1058
+ },
1059
+ {
1060
+ "epoch": 2.8842105263157896,
1061
+ "grad_norm": 4.651484966278076,
1062
+ "learning_rate": 2.0378121479783796e-07,
1063
+ "loss": 0.8938,
1064
+ "step": 411
1065
+ },
1066
+ {
1067
+ "epoch": 2.905263157894737,
1068
+ "grad_norm": 4.784148216247559,
1069
+ "learning_rate": 1.3826509018227128e-07,
1070
+ "loss": 0.9602,
1071
+ "step": 414
1072
+ },
1073
+ {
1074
+ "epoch": 2.9263157894736844,
1075
+ "grad_norm": 4.499444007873535,
1076
+ "learning_rate": 8.538767483325383e-08,
1077
+ "loss": 0.985,
1078
+ "step": 417
1079
+ },
1080
+ {
1081
+ "epoch": 2.9473684210526314,
1082
+ "grad_norm": 5.214015483856201,
1083
+ "learning_rate": 4.517584950877452e-08,
1084
+ "loss": 0.9054,
1085
+ "step": 420
1086
+ },
1087
+ {
1088
+ "epoch": 2.968421052631579,
1089
+ "grad_norm": 3.8694188594818115,
1090
+ "learning_rate": 1.7650056288651127e-08,
1091
+ "loss": 0.651,
1092
+ "step": 423
1093
+ },
1094
+ {
1095
+ "epoch": 2.9894736842105263,
1096
+ "grad_norm": 3.8104214668273926,
1097
+ "learning_rate": 2.8242881825846223e-09,
1098
+ "loss": 0.8252,
1099
+ "step": 426
1100
  }
1101
  ],
1102
  "logging_steps": 3,
 
1111
  "should_evaluate": false,
1112
  "should_log": false,
1113
  "should_save": true,
1114
+ "should_training_stop": true
1115
  },
1116
  "attributes": {}
1117
  }
1118
  },
1119
+ "total_flos": 1.7558214228836352e+18,
1120
  "train_batch_size": 8,
1121
  "trial_name": null,
1122
  "trial_params": null