Whispful commited on
Commit
7d42fb7
·
verified ·
1 Parent(s): 598e300

Training in progress, step 166, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7a4badba070813dd4dffb29191acb613764e44d167a24151aa79e92b18167bd
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f2c775e4545903835c7d87abda98c1a26cbf4701f00d7baddc13d768e2697ff
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ca5c092981a6a8c077240e302146e7134995fe3fd122344acd6dd77967b9925
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c8963ce2ba916ef8eab829306053aacb2d91470fc8a1676d098d2665cd90e9
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6134ac16e34c98d433ebbee4c57656cf7fa4d7418acdba1cf90ca8f2fc421ff1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc0b191b4797e200d61c94545745999fce4bd5da0439819c3a09163bf991f35f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:442d35fdd8f035149dd189332292077851133dce57ad65477bd773e133f2c810
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b2ce6896037c31b7758572f9a2fb930fa6d093ae265425c5633672a67c2eba0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.06985995918512344,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 2.229447282861124,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1113,6 +1113,118 @@
1113
  "eval_samples_per_second": 3.984,
1114
  "eval_steps_per_second": 3.984,
1115
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  }
1117
  ],
1118
  "logging_steps": 1,
@@ -1136,12 +1248,12 @@
1136
  "should_evaluate": false,
1137
  "should_log": false,
1138
  "should_save": true,
1139
- "should_training_stop": false
1140
  },
1141
  "attributes": {}
1142
  }
1143
  },
1144
- "total_flos": 4.4863203567049114e+17,
1145
  "train_batch_size": 1,
1146
  "trial_name": null,
1147
  "trial_params": null
 
1
  {
2
  "best_metric": 0.06985995918512344,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 2.4672549930329772,
5
  "eval_steps": 25,
6
+ "global_step": 166,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1113
  "eval_samples_per_second": 3.984,
1114
  "eval_steps_per_second": 3.984,
1115
  "step": 150
1116
+ },
1117
+ {
1118
+ "epoch": 2.244310264746865,
1119
+ "grad_norm": 0.31291377544403076,
1120
+ "learning_rate": 4.794673694696306e-06,
1121
+ "loss": 0.0571,
1122
+ "step": 151
1123
+ },
1124
+ {
1125
+ "epoch": 2.2591732466326055,
1126
+ "grad_norm": 0.3577059805393219,
1127
+ "learning_rate": 4.692911722106433e-06,
1128
+ "loss": 0.0726,
1129
+ "step": 152
1130
+ },
1131
+ {
1132
+ "epoch": 2.2740362285183466,
1133
+ "grad_norm": 0.4630509614944458,
1134
+ "learning_rate": 4.597991941755332e-06,
1135
+ "loss": 0.0784,
1136
+ "step": 153
1137
+ },
1138
+ {
1139
+ "epoch": 2.2888992104040873,
1140
+ "grad_norm": 0.4654407501220703,
1141
+ "learning_rate": 4.50995187927262e-06,
1142
+ "loss": 0.081,
1143
+ "step": 154
1144
+ },
1145
+ {
1146
+ "epoch": 2.3037621922898284,
1147
+ "grad_norm": 0.532038152217865,
1148
+ "learning_rate": 4.428826340457088e-06,
1149
+ "loss": 0.0843,
1150
+ "step": 155
1151
+ },
1152
+ {
1153
+ "epoch": 2.318625174175569,
1154
+ "grad_norm": 0.4133490324020386,
1155
+ "learning_rate": 4.354647397516551e-06,
1156
+ "loss": 0.0635,
1157
+ "step": 156
1158
+ },
1159
+ {
1160
+ "epoch": 2.3334881560613097,
1161
+ "grad_norm": 0.49163010716438293,
1162
+ "learning_rate": 4.287444376388429e-06,
1163
+ "loss": 0.0773,
1164
+ "step": 157
1165
+ },
1166
+ {
1167
+ "epoch": 2.3483511379470507,
1168
+ "grad_norm": 0.4378342628479004,
1169
+ "learning_rate": 4.227243845146e-06,
1170
+ "loss": 0.0575,
1171
+ "step": 158
1172
+ },
1173
+ {
1174
+ "epoch": 2.3632141198327914,
1175
+ "grad_norm": 0.5189927220344543,
1176
+ "learning_rate": 4.174069603494967e-06,
1177
+ "loss": 0.0468,
1178
+ "step": 159
1179
+ },
1180
+ {
1181
+ "epoch": 2.3780771017185325,
1182
+ "grad_norm": 0.3820410370826721,
1183
+ "learning_rate": 4.127942673364479e-06,
1184
+ "loss": 0.0488,
1185
+ "step": 160
1186
+ },
1187
+ {
1188
+ "epoch": 2.392940083604273,
1189
+ "grad_norm": 0.3878793716430664,
1190
+ "learning_rate": 4.088881290596307e-06,
1191
+ "loss": 0.0475,
1192
+ "step": 161
1193
+ },
1194
+ {
1195
+ "epoch": 2.4078030654900138,
1196
+ "grad_norm": 0.34497255086898804,
1197
+ "learning_rate": 4.0569008977354756e-06,
1198
+ "loss": 0.0365,
1199
+ "step": 162
1200
+ },
1201
+ {
1202
+ "epoch": 2.422666047375755,
1203
+ "grad_norm": 0.32925593852996826,
1204
+ "learning_rate": 4.032014137925207e-06,
1205
+ "loss": 0.0354,
1206
+ "step": 163
1207
+ },
1208
+ {
1209
+ "epoch": 2.4375290292614955,
1210
+ "grad_norm": 0.37827885150909424,
1211
+ "learning_rate": 4.014230849908567e-06,
1212
+ "loss": 0.0354,
1213
+ "step": 164
1214
+ },
1215
+ {
1216
+ "epoch": 2.4523920111472366,
1217
+ "grad_norm": 0.31829050183296204,
1218
+ "learning_rate": 4.003558064138821e-06,
1219
+ "loss": 0.033,
1220
+ "step": 165
1221
+ },
1222
+ {
1223
+ "epoch": 2.4672549930329772,
1224
+ "grad_norm": 0.4024122655391693,
1225
+ "learning_rate": 4.000000000000001e-06,
1226
+ "loss": 0.0324,
1227
+ "step": 166
1228
  }
1229
  ],
1230
  "logging_steps": 1,
 
1248
  "should_evaluate": false,
1249
  "should_log": false,
1250
  "should_save": true,
1251
+ "should_training_stop": true
1252
  },
1253
  "attributes": {}
1254
  }
1255
  },
1256
+ "total_flos": 4.965632072220672e+17,
1257
  "train_batch_size": 1,
1258
  "trial_name": null,
1259
  "trial_params": null