Rodo-Sami commited on
Commit
1d1f9c5
1 Parent(s): 2f33d6d

Training in progress, step 35, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79cc6a18876559809e4a724347b95a86bda27f1f865497c98e331ca191c5396e
3
  size 432223744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db147ebb62417f81519e92d1b096573887b29755593ccce42d03cfada4e8191
3
  size 432223744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc258936e5ed6574cd7a789672d7acbbfdd3deea00a8ce16c126006b1600bb3f
3
  size 864785974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1494c90d577ac674719fcc3483fd15c82e59f5d9b9290d2b5c0144f79a43fe1
3
  size 864785974
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f7e416783ded80e754d433ee54d8ada7de014aade71da08382234de5539d9b5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:983ffc8d80e0f89eb6c032f40fbde1c4ea7470dcda8f2d1ff67029065309fbf4
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f95ca7e75522f7ca7ffa31c80e048b0980d775d98cbd9c8bf1f97643a423c957
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cef4a0f369ece5771a253d7d41b5911f46a68f493521964477ab6fe637f1da3
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3048cb4a0da77acd359ab208c5b6249130428d9a657329f6bd012934712558a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883566e95d53136cb99377aab4cac05d583a73b85430a197a9f587f439a55635
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cb81ea9f530fec5dd45037e557013ae6c30b722e432cc6cd443d9400270cadb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8036c139d9fdac2a864f5c9e2869f51f3bd93e9bd3b5deded9265e6135dedf1
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42c052e81d046347be3074b07671b8743acfee2fa86afcaf4cf737a53f8e085c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400690333b498d3c3ea4cd1446f834c1cb37202647b1e68e58a488ea7599ce2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7776047587394714,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 1.4652014652014653,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,76 @@
198
  "eval_samples_per_second": 19.549,
199
  "eval_steps_per_second": 1.275,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +291,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 6.631381723512832e+17,
230
  "train_batch_size": 4,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7776047587394714,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
+ "epoch": 2.051282051282051,
5
  "eval_steps": 25,
6
+ "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 19.549,
199
  "eval_steps_per_second": 1.275,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 1.5238095238095237,
204
+ "grad_norm": 0.7313464879989624,
205
+ "learning_rate": 1.725696330273575e-05,
206
+ "loss": 0.7806,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 1.5824175824175826,
211
+ "grad_norm": 0.6874339580535889,
212
+ "learning_rate": 1.3813298094746491e-05,
213
+ "loss": 0.744,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 1.641025641025641,
218
+ "grad_norm": 0.6659790277481079,
219
+ "learning_rate": 1.0697345262860636e-05,
220
+ "loss": 0.8011,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 1.6996336996336996,
225
+ "grad_norm": 0.8043178915977478,
226
+ "learning_rate": 7.937323358440935e-06,
227
+ "loss": 0.8361,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 1.7582417582417582,
232
+ "grad_norm": 0.5227417349815369,
233
+ "learning_rate": 5.558227567253832e-06,
234
+ "loss": 0.7979,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 1.8168498168498168,
239
+ "grad_norm": 0.5979397892951965,
240
+ "learning_rate": 3.581603349196372e-06,
241
+ "loss": 0.7131,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 1.8754578754578755,
246
+ "grad_norm": 0.43952739238739014,
247
+ "learning_rate": 2.0253513192751373e-06,
248
+ "loss": 0.7307,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 1.934065934065934,
253
+ "grad_norm": 0.8428590297698975,
254
+ "learning_rate": 9.035651368646648e-07,
255
+ "loss": 0.7661,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 1.9926739926739927,
260
+ "grad_norm": 0.6554544568061829,
261
+ "learning_rate": 2.2640387134577058e-07,
262
+ "loss": 0.8265,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 2.051282051282051,
267
+ "grad_norm": 2.641814708709717,
268
+ "learning_rate": 0.0,
269
+ "loss": 1.6606,
270
+ "step": 35
271
  }
272
  ],
273
  "logging_steps": 1,
 
291
  "should_evaluate": false,
292
  "should_log": false,
293
  "should_save": true,
294
+ "should_training_stop": true
295
  },
296
  "attributes": {}
297
  }
298
  },
299
+ "total_flos": 9.283934412917965e+17,
300
  "train_batch_size": 4,
301
  "trial_name": null,
302
  "trial_params": null