RoyJoy commited on
Commit
2d028c9
·
verified ·
1 Parent(s): 58ecb53

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c23385efa3fc955db2fff44d3db23a01911ec422e51ad2d424feeb61c661899
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1e4e563e0fb719fd954110f793016a75240aae68bec4a466068b9ac9a16f53a
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b362f1ba0221f89f41d336bf1b9606120164cef605cf9911b172ce12a3f648
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488ed8b15db5016098693e2adfd70de76b0d7c338d0f05c5308ccd3ba2f51f36
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2abc4fdd8cb56fd00022ed88191faa18e1480506d1d27bfd3ba0d3f67f348b05
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8e58e3ea04cfdd0cf876acb246fc92a1b5db6339901eb55ca6cb462639cf77b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9620e65ba759e84fe497b9913b32b878de30259f05dce4a6e4bb8378e19a64ee
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f80a318932d5a5365c8572db2538b64fc8c6d187678736ec5b43b421fa8daf07
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf07919d7c2ea628f5a103dcb60df086f1a06bbf7048cc92dce48b51087bc00a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a106cfc9f6e15ffe5207fdea7ef40b48af5e5e0fb6265f1f8f421b1f96ecfff
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9a5229f138ebfdf512957fabcfbb40f1ed6738724079ef1ae986d222f3455c12
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fde3ca88e88bba4acc0257b74e8363297652842185389072df73b8952fa889c
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.399475336074829,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 0.013121207151057898,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 35.071,
199
  "eval_steps_per_second": 4.385,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +404,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 2.9859068337822106e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.353839635848999,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 0.026242414302115796,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 35.071,
199
  "eval_steps_per_second": 4.385,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.013646055437100213,
204
+ "grad_norm": 0.433169960975647,
205
+ "learning_rate": 5e-05,
206
+ "loss": 2.3497,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.014170903723142529,
211
+ "grad_norm": 0.44894787669181824,
212
+ "learning_rate": 4.6729843538492847e-05,
213
+ "loss": 2.3805,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.014695752009184845,
218
+ "grad_norm": 0.48380163311958313,
219
+ "learning_rate": 4.347369038899744e-05,
220
+ "loss": 2.4482,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.015220600295227161,
225
+ "grad_norm": 0.41069453954696655,
226
+ "learning_rate": 4.0245483899193595e-05,
227
+ "loss": 2.424,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.015745448581269476,
232
+ "grad_norm": 0.3481389582157135,
233
+ "learning_rate": 3.705904774487396e-05,
234
+ "loss": 2.3664,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.016270296867311792,
239
+ "grad_norm": 0.3449362814426422,
240
+ "learning_rate": 3.392802673484193e-05,
241
+ "loss": 2.2724,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.016795145153354108,
246
+ "grad_norm": 0.3418646454811096,
247
+ "learning_rate": 3.086582838174551e-05,
248
+ "loss": 2.3587,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.017319993439396424,
253
+ "grad_norm": 0.4137051999568939,
254
+ "learning_rate": 2.7885565489049946e-05,
255
+ "loss": 2.2393,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.01784484172543874,
260
+ "grad_norm": 0.3769303560256958,
261
+ "learning_rate": 2.500000000000001e-05,
262
+ "loss": 2.4014,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.018369690011481057,
267
+ "grad_norm": 0.44172248244285583,
268
+ "learning_rate": 2.2221488349019903e-05,
269
+ "loss": 2.2937,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.018894538297523373,
274
+ "grad_norm": 0.46419256925582886,
275
+ "learning_rate": 1.9561928549563968e-05,
276
+ "loss": 2.4315,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.01941938658356569,
281
+ "grad_norm": 0.6354751586914062,
282
+ "learning_rate": 1.703270924499656e-05,
283
+ "loss": 2.5355,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.019944234869608005,
288
+ "grad_norm": 0.3062434196472168,
289
+ "learning_rate": 1.4644660940672627e-05,
290
+ "loss": 2.4477,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.02046908315565032,
295
+ "grad_norm": 0.29478734731674194,
296
+ "learning_rate": 1.2408009626051137e-05,
297
+ "loss": 2.3797,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.020993931441692634,
302
+ "grad_norm": 0.27740278840065,
303
+ "learning_rate": 1.0332332985438248e-05,
304
+ "loss": 2.2876,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.02151877972773495,
309
+ "grad_norm": 0.28595972061157227,
310
+ "learning_rate": 8.426519384872733e-06,
311
+ "loss": 2.3316,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.022043628013777267,
316
+ "grad_norm": 0.36039939522743225,
317
+ "learning_rate": 6.698729810778065e-06,
318
+ "loss": 2.3168,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.022568476299819583,
323
+ "grad_norm": 0.2989155650138855,
324
+ "learning_rate": 5.156362923365588e-06,
325
+ "loss": 2.3838,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.0230933245858619,
330
+ "grad_norm": 0.32794296741485596,
331
+ "learning_rate": 3.8060233744356633e-06,
332
+ "loss": 2.3061,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.023618172871904215,
337
+ "grad_norm": 0.34658685326576233,
338
+ "learning_rate": 2.653493525244721e-06,
339
+ "loss": 2.227,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 0.02414302115794653,
344
+ "grad_norm": 0.37640607357025146,
345
+ "learning_rate": 1.70370868554659e-06,
346
+ "loss": 2.2494,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 0.024667869443988848,
351
+ "grad_norm": 0.3610791265964508,
352
+ "learning_rate": 9.607359798384785e-07,
353
+ "loss": 2.3854,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 0.025192717730031164,
358
+ "grad_norm": 0.39148759841918945,
359
+ "learning_rate": 4.277569313094809e-07,
360
+ "loss": 2.403,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 0.02571756601607348,
365
+ "grad_norm": 0.4699634313583374,
366
+ "learning_rate": 1.0705383806982606e-07,
367
+ "loss": 2.3342,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 0.026242414302115796,
372
+ "grad_norm": 0.7783499360084534,
373
+ "learning_rate": 0.0,
374
+ "loss": 2.4725,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.026242414302115796,
379
+ "eval_loss": 2.353839635848999,
380
+ "eval_runtime": 366.0989,
381
+ "eval_samples_per_second": 35.062,
382
+ "eval_steps_per_second": 4.384,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
+ "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 5.969948642440643e+17,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null