DeepDream2045 commited on
Commit
825d380
1 Parent(s): dcc42d5

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9958677d822a74081551e0f85a233c1700f4bd4e21bb1131276e8839c0dac1a
3
  size 500770656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5709aeefa693f7d5b93b7c49854fffea031f4318bcaaf89b4e8725366141ec3
3
  size 500770656
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39af447e8ee3b88f5378037bec15dce3370626bf41d6872b2dd622e8d88c4b52
3
  size 1001863522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae786c9f627e2523fed346d92f928c04a7aa4913ff7e8304fe52f309eced9de
3
  size 1001863522
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc7d5f81cc1567f8e0ffa0cab8835988dd15e7ecaeeac9139b16c8d96859ffca
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b474e5149b21f64a1ce0c5942810d1ace65abf10ebe5ee2b10eb7f5614c91f94
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ef3e09cdc45bd74f2d86972e74a57c56ea7533589ef2f7ab26b438ccb78f200
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8cd03d1e0bf00d787a45dba164e732e4b07efa0c8134bfaa649d7c7ba50c9c7
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9e6f39983e1f416171a756b503a109fed1e35e906c834d01839ab332909c641
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e754ffa126a571d843e9b1c1f1d2ebc5e7c1ee0db360328af6d94b4f51714d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b88548de3fe89a2a72d298b03763bf6032778780e5330229294c7f07b9421e64
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bde6f0d7dc60d5465bc71ebf9276915461cb7742722ce104dcdc397e2e84228
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f37b2aa490ccb1598b01e14cda36e9081f7ce646deab4d3c2d03de0d2169a755
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.09957851469516754,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-25",
4
- "epoch": 0.6015037593984962,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 20.691,
199
  "eval_steps_per_second": 2.586,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -221,12 +404,12 @@
221
  "should_evaluate": false,
222
  "should_log": false,
223
  "should_save": true,
224
- "should_training_stop": false
225
  },
226
  "attributes": {}
227
  }
228
  },
229
- "total_flos": 5.102844201926656e+17,
230
  "train_batch_size": 2,
231
  "trial_name": null,
232
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.09947212040424347,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 1.2045112781954888,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 20.691,
199
  "eval_steps_per_second": 2.586,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.6255639097744361,
204
+ "grad_norm": 0.02670644409954548,
205
+ "learning_rate": 5e-05,
206
+ "loss": 0.0999,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.649624060150376,
211
+ "grad_norm": 0.02877388522028923,
212
+ "learning_rate": 4.6729843538492847e-05,
213
+ "loss": 0.0983,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.6736842105263158,
218
+ "grad_norm": 0.031500719487667084,
219
+ "learning_rate": 4.347369038899744e-05,
220
+ "loss": 0.096,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.6977443609022557,
225
+ "grad_norm": 0.048165999352931976,
226
+ "learning_rate": 4.0245483899193595e-05,
227
+ "loss": 0.0958,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.7218045112781954,
232
+ "grad_norm": 0.09344542026519775,
233
+ "learning_rate": 3.705904774487396e-05,
234
+ "loss": 0.0965,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.7458646616541353,
239
+ "grad_norm": 0.12452530115842819,
240
+ "learning_rate": 3.392802673484193e-05,
241
+ "loss": 0.0986,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.7699248120300752,
246
+ "grad_norm": 0.12313531339168549,
247
+ "learning_rate": 3.086582838174551e-05,
248
+ "loss": 0.1015,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.793984962406015,
253
+ "grad_norm": 0.1442219614982605,
254
+ "learning_rate": 2.7885565489049946e-05,
255
+ "loss": 0.105,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.8180451127819549,
260
+ "grad_norm": 0.06111603602766991,
261
+ "learning_rate": 2.500000000000001e-05,
262
+ "loss": 0.1014,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.8421052631578947,
267
+ "grad_norm": 0.027853839099407196,
268
+ "learning_rate": 2.2221488349019903e-05,
269
+ "loss": 0.0995,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.8661654135338346,
274
+ "grad_norm": 0.13428233563899994,
275
+ "learning_rate": 1.9561928549563968e-05,
276
+ "loss": 0.1005,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.8902255639097745,
281
+ "grad_norm": 0.12706559896469116,
282
+ "learning_rate": 1.703270924499656e-05,
283
+ "loss": 0.1006,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.9142857142857143,
288
+ "grad_norm": 0.15370112657546997,
289
+ "learning_rate": 1.4644660940672627e-05,
290
+ "loss": 0.1004,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.9383458646616541,
295
+ "grad_norm": 0.1685517281293869,
296
+ "learning_rate": 1.2408009626051137e-05,
297
+ "loss": 0.1,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.9624060150375939,
302
+ "grad_norm": 0.06856564432382584,
303
+ "learning_rate": 1.0332332985438248e-05,
304
+ "loss": 0.0956,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.9864661654135338,
309
+ "grad_norm": 0.1981419026851654,
310
+ "learning_rate": 8.426519384872733e-06,
311
+ "loss": 0.097,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 1.0120300751879698,
316
+ "grad_norm": 0.07202955335378647,
317
+ "learning_rate": 6.698729810778065e-06,
318
+ "loss": 0.1529,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 1.0360902255639097,
323
+ "grad_norm": 0.030848056077957153,
324
+ "learning_rate": 5.156362923365588e-06,
325
+ "loss": 0.101,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 1.0601503759398496,
330
+ "grad_norm": 0.03846440464258194,
331
+ "learning_rate": 3.8060233744356633e-06,
332
+ "loss": 0.1012,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 1.0842105263157895,
337
+ "grad_norm": 0.036180026829242706,
338
+ "learning_rate": 2.653493525244721e-06,
339
+ "loss": 0.1004,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 1.1082706766917294,
344
+ "grad_norm": 0.025751329958438873,
345
+ "learning_rate": 1.70370868554659e-06,
346
+ "loss": 0.0993,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 1.132330827067669,
351
+ "grad_norm": 0.06255143880844116,
352
+ "learning_rate": 9.607359798384785e-07,
353
+ "loss": 0.0998,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 1.156390977443609,
358
+ "grad_norm": 0.04273088276386261,
359
+ "learning_rate": 4.277569313094809e-07,
360
+ "loss": 0.0987,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 1.1804511278195489,
365
+ "grad_norm": 0.02951870672404766,
366
+ "learning_rate": 1.0705383806982606e-07,
367
+ "loss": 0.0958,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 1.2045112781954888,
372
+ "grad_norm": 0.03614836931228638,
373
+ "learning_rate": 0.0,
374
+ "loss": 0.0964,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 1.2045112781954888,
379
+ "eval_loss": 0.09947212040424347,
380
+ "eval_runtime": 13.5385,
381
+ "eval_samples_per_second": 20.682,
382
+ "eval_steps_per_second": 2.585,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
+ "should_training_stop": true
408
  },
409
  "attributes": {}
410
  }
411
  },
412
+ "total_flos": 1.0205688403853312e+18,
413
  "train_batch_size": 2,
414
  "trial_name": null,
415
  "trial_params": null