dimasik1987 commited on
Commit
2a4ac4c
1 Parent(s): 4a5e387

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:caa0eaee5cedb377fe1261e85d128185b90db08b38266c4efca995ed2dccaee7
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6c3b49517918f71b5a8df86ce64802524c101812f6310fcee02fef5d317af6
3
  size 671466706
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1ed0d05f98b7f3f40accdab65ca1d1e070ed54fcbe788400dbc49cadd189e5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cadd65cd84f71c9af827b83eda5019a7e53557ca55a0ba59402e7753460dc671
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6676fe28230ae15b45fb334c871c6fdf1a7984a935952b9f8650896c37a8c106
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.012394645513138325,
5
  "eval_steps": 4,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -238,6 +238,229 @@
238
  "learning_rate": 6.91341716182545e-05,
239
  "loss": 0.0,
240
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
  ],
243
  "logging_steps": 1,
@@ -252,12 +475,12 @@
252
  "should_evaluate": false,
253
  "should_log": false,
254
  "should_save": true,
255
- "should_training_stop": false
256
  },
257
  "attributes": {}
258
  }
259
  },
260
- "total_flos": 1.119015678246912e+17,
261
  "train_batch_size": 4,
262
  "trial_name": null,
263
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.02478929102627665,
5
  "eval_steps": 4,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
238
  "learning_rate": 6.91341716182545e-05,
239
  "loss": 0.0,
240
  "step": 25
241
+ },
242
+ {
243
+ "epoch": 0.012890431333663858,
244
+ "grad_norm": NaN,
245
+ "learning_rate": 6.545084971874738e-05,
246
+ "loss": 0.0,
247
+ "step": 26
248
+ },
249
+ {
250
+ "epoch": 0.013386217154189391,
251
+ "grad_norm": NaN,
252
+ "learning_rate": 6.167226819279528e-05,
253
+ "loss": 0.0,
254
+ "step": 27
255
+ },
256
+ {
257
+ "epoch": 0.013882002974714923,
258
+ "grad_norm": NaN,
259
+ "learning_rate": 5.782172325201155e-05,
260
+ "loss": 0.0,
261
+ "step": 28
262
+ },
263
+ {
264
+ "epoch": 0.013882002974714923,
265
+ "eval_loss": NaN,
266
+ "eval_runtime": 1218.6897,
267
+ "eval_samples_per_second": 2.091,
268
+ "eval_steps_per_second": 0.523,
269
+ "step": 28
270
+ },
271
+ {
272
+ "epoch": 0.014377788795240456,
273
+ "grad_norm": NaN,
274
+ "learning_rate": 5.392295478639225e-05,
275
+ "loss": 0.0,
276
+ "step": 29
277
+ },
278
+ {
279
+ "epoch": 0.014873574615765989,
280
+ "grad_norm": NaN,
281
+ "learning_rate": 5e-05,
282
+ "loss": 0.0,
283
+ "step": 30
284
+ },
285
+ {
286
+ "epoch": 0.015369360436291522,
287
+ "grad_norm": NaN,
288
+ "learning_rate": 4.607704521360776e-05,
289
+ "loss": 0.0,
290
+ "step": 31
291
+ },
292
+ {
293
+ "epoch": 0.015865146256817054,
294
+ "grad_norm": NaN,
295
+ "learning_rate": 4.2178276747988446e-05,
296
+ "loss": 0.0,
297
+ "step": 32
298
+ },
299
+ {
300
+ "epoch": 0.015865146256817054,
301
+ "eval_loss": NaN,
302
+ "eval_runtime": 1218.7929,
303
+ "eval_samples_per_second": 2.091,
304
+ "eval_steps_per_second": 0.523,
305
+ "step": 32
306
+ },
307
+ {
308
+ "epoch": 0.01636093207734259,
309
+ "grad_norm": NaN,
310
+ "learning_rate": 3.832773180720475e-05,
311
+ "loss": 0.0,
312
+ "step": 33
313
+ },
314
+ {
315
+ "epoch": 0.01685671789786812,
316
+ "grad_norm": NaN,
317
+ "learning_rate": 3.4549150281252636e-05,
318
+ "loss": 0.0,
319
+ "step": 34
320
+ },
321
+ {
322
+ "epoch": 0.017352503718393655,
323
+ "grad_norm": NaN,
324
+ "learning_rate": 3.086582838174551e-05,
325
+ "loss": 0.0,
326
+ "step": 35
327
+ },
328
+ {
329
+ "epoch": 0.017848289538919187,
330
+ "grad_norm": NaN,
331
+ "learning_rate": 2.7300475013022663e-05,
332
+ "loss": 0.0,
333
+ "step": 36
334
+ },
335
+ {
336
+ "epoch": 0.017848289538919187,
337
+ "eval_loss": NaN,
338
+ "eval_runtime": 1218.8632,
339
+ "eval_samples_per_second": 2.09,
340
+ "eval_steps_per_second": 0.523,
341
+ "step": 36
342
+ },
343
+ {
344
+ "epoch": 0.01834407535944472,
345
+ "grad_norm": NaN,
346
+ "learning_rate": 2.3875071764202563e-05,
347
+ "loss": 0.0,
348
+ "step": 37
349
+ },
350
+ {
351
+ "epoch": 0.018839861179970253,
352
+ "grad_norm": NaN,
353
+ "learning_rate": 2.061073738537635e-05,
354
+ "loss": 0.0,
355
+ "step": 38
356
+ },
357
+ {
358
+ "epoch": 0.019335647000495785,
359
+ "grad_norm": NaN,
360
+ "learning_rate": 1.7527597583490822e-05,
361
+ "loss": 0.0,
362
+ "step": 39
363
+ },
364
+ {
365
+ "epoch": 0.01983143282102132,
366
+ "grad_norm": NaN,
367
+ "learning_rate": 1.4644660940672627e-05,
368
+ "loss": 0.0,
369
+ "step": 40
370
+ },
371
+ {
372
+ "epoch": 0.01983143282102132,
373
+ "eval_loss": NaN,
374
+ "eval_runtime": 1219.2297,
375
+ "eval_samples_per_second": 2.09,
376
+ "eval_steps_per_second": 0.522,
377
+ "step": 40
378
+ },
379
+ {
380
+ "epoch": 0.02032721864154685,
381
+ "grad_norm": NaN,
382
+ "learning_rate": 1.1979701719998453e-05,
383
+ "loss": 0.0,
384
+ "step": 41
385
+ },
386
+ {
387
+ "epoch": 0.020823004462072386,
388
+ "grad_norm": NaN,
389
+ "learning_rate": 9.549150281252633e-06,
390
+ "loss": 0.0,
391
+ "step": 42
392
+ },
393
+ {
394
+ "epoch": 0.021318790282597918,
395
+ "grad_norm": NaN,
396
+ "learning_rate": 7.367991782295391e-06,
397
+ "loss": 0.0,
398
+ "step": 43
399
+ },
400
+ {
401
+ "epoch": 0.02181457610312345,
402
+ "grad_norm": NaN,
403
+ "learning_rate": 5.449673790581611e-06,
404
+ "loss": 0.0,
405
+ "step": 44
406
+ },
407
+ {
408
+ "epoch": 0.02181457610312345,
409
+ "eval_loss": NaN,
410
+ "eval_runtime": 1218.9478,
411
+ "eval_samples_per_second": 2.09,
412
+ "eval_steps_per_second": 0.523,
413
+ "step": 44
414
+ },
415
+ {
416
+ "epoch": 0.022310361923648984,
417
+ "grad_norm": NaN,
418
+ "learning_rate": 3.8060233744356633e-06,
419
+ "loss": 0.0,
420
+ "step": 45
421
+ },
422
+ {
423
+ "epoch": 0.022806147744174516,
424
+ "grad_norm": NaN,
425
+ "learning_rate": 2.4471741852423237e-06,
426
+ "loss": 0.0,
427
+ "step": 46
428
+ },
429
+ {
430
+ "epoch": 0.02330193356470005,
431
+ "grad_norm": NaN,
432
+ "learning_rate": 1.3815039801161721e-06,
433
+ "loss": 0.0,
434
+ "step": 47
435
+ },
436
+ {
437
+ "epoch": 0.023797719385225583,
438
+ "grad_norm": NaN,
439
+ "learning_rate": 6.15582970243117e-07,
440
+ "loss": 0.0,
441
+ "step": 48
442
+ },
443
+ {
444
+ "epoch": 0.023797719385225583,
445
+ "eval_loss": NaN,
446
+ "eval_runtime": 1219.4358,
447
+ "eval_samples_per_second": 2.089,
448
+ "eval_steps_per_second": 0.522,
449
+ "step": 48
450
+ },
451
+ {
452
+ "epoch": 0.024293505205751114,
453
+ "grad_norm": NaN,
454
+ "learning_rate": 1.5413331334360182e-07,
455
+ "loss": 0.0,
456
+ "step": 49
457
+ },
458
+ {
459
+ "epoch": 0.02478929102627665,
460
+ "grad_norm": NaN,
461
+ "learning_rate": 0.0,
462
+ "loss": 0.0,
463
+ "step": 50
464
  }
465
  ],
466
  "logging_steps": 1,
 
475
  "should_evaluate": false,
476
  "should_log": false,
477
  "should_save": true,
478
+ "should_training_stop": true
479
  },
480
  "attributes": {}
481
  }
482
  },
483
+ "total_flos": 2.238031356493824e+17,
484
  "train_batch_size": 4,
485
  "trial_name": null,
486
  "trial_params": null