dimasik1987 commited on
Commit
afd88cf
1 Parent(s): 9db6e26

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:973b317c499569fc3a634a5daf57f900be93f95a3b9ea7a2715567fa25ceb740
3
  size 640009682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf5d94cce3b45e1ab7b2c8dbea2246c8742fab79f1a17b5c626d10dd266cb45
3
  size 640009682
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be5c5d51a63e0cb37ff1ba25afb3da4b225954ece5f1bb97b0727c600738227e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80bf3fc1413a7f1d4712f24107188644eeaa65cc0ab1cc9369b6470db8e00a2d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6676fe28230ae15b45fb334c871c6fdf1a7984a935952b9f8650896c37a8c106
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.024366471734892786,
5
  "eval_steps": 4,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -238,6 +238,229 @@
238
  "learning_rate": 6.91341716182545e-05,
239
  "loss": 0.0,
240
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  }
242
  ],
243
  "logging_steps": 1,
@@ -252,12 +475,12 @@
252
  "should_evaluate": false,
253
  "should_log": false,
254
  "should_save": true,
255
- "should_training_stop": false
256
  },
257
  "attributes": {}
258
  }
259
  },
260
- "total_flos": 9.86091809144832e+16,
261
  "train_batch_size": 4,
262
  "trial_name": null,
263
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04873294346978557,
5
  "eval_steps": 4,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
238
  "learning_rate": 6.91341716182545e-05,
239
  "loss": 0.0,
240
  "step": 25
241
+ },
242
+ {
243
+ "epoch": 0.025341130604288498,
244
+ "grad_norm": NaN,
245
+ "learning_rate": 6.545084971874738e-05,
246
+ "loss": 0.0,
247
+ "step": 26
248
+ },
249
+ {
250
+ "epoch": 0.02631578947368421,
251
+ "grad_norm": NaN,
252
+ "learning_rate": 6.167226819279528e-05,
253
+ "loss": 0.0,
254
+ "step": 27
255
+ },
256
+ {
257
+ "epoch": 0.02729044834307992,
258
+ "grad_norm": NaN,
259
+ "learning_rate": 5.782172325201155e-05,
260
+ "loss": 0.0,
261
+ "step": 28
262
+ },
263
+ {
264
+ "epoch": 0.02729044834307992,
265
+ "eval_loss": NaN,
266
+ "eval_runtime": 573.2103,
267
+ "eval_samples_per_second": 2.263,
268
+ "eval_steps_per_second": 0.567,
269
+ "step": 28
270
+ },
271
+ {
272
+ "epoch": 0.028265107212475632,
273
+ "grad_norm": NaN,
274
+ "learning_rate": 5.392295478639225e-05,
275
+ "loss": 0.0,
276
+ "step": 29
277
+ },
278
+ {
279
+ "epoch": 0.029239766081871343,
280
+ "grad_norm": NaN,
281
+ "learning_rate": 5e-05,
282
+ "loss": 0.0,
283
+ "step": 30
284
+ },
285
+ {
286
+ "epoch": 0.030214424951267055,
287
+ "grad_norm": NaN,
288
+ "learning_rate": 4.607704521360776e-05,
289
+ "loss": 0.0,
290
+ "step": 31
291
+ },
292
+ {
293
+ "epoch": 0.031189083820662766,
294
+ "grad_norm": NaN,
295
+ "learning_rate": 4.2178276747988446e-05,
296
+ "loss": 0.0,
297
+ "step": 32
298
+ },
299
+ {
300
+ "epoch": 0.031189083820662766,
301
+ "eval_loss": NaN,
302
+ "eval_runtime": 572.9542,
303
+ "eval_samples_per_second": 2.264,
304
+ "eval_steps_per_second": 0.567,
305
+ "step": 32
306
+ },
307
+ {
308
+ "epoch": 0.03216374269005848,
309
+ "grad_norm": NaN,
310
+ "learning_rate": 3.832773180720475e-05,
311
+ "loss": 0.0,
312
+ "step": 33
313
+ },
314
+ {
315
+ "epoch": 0.03313840155945419,
316
+ "grad_norm": NaN,
317
+ "learning_rate": 3.4549150281252636e-05,
318
+ "loss": 0.0,
319
+ "step": 34
320
+ },
321
+ {
322
+ "epoch": 0.0341130604288499,
323
+ "grad_norm": NaN,
324
+ "learning_rate": 3.086582838174551e-05,
325
+ "loss": 0.0,
326
+ "step": 35
327
+ },
328
+ {
329
+ "epoch": 0.03508771929824561,
330
+ "grad_norm": NaN,
331
+ "learning_rate": 2.7300475013022663e-05,
332
+ "loss": 0.0,
333
+ "step": 36
334
+ },
335
+ {
336
+ "epoch": 0.03508771929824561,
337
+ "eval_loss": NaN,
338
+ "eval_runtime": 572.9072,
339
+ "eval_samples_per_second": 2.264,
340
+ "eval_steps_per_second": 0.567,
341
+ "step": 36
342
+ },
343
+ {
344
+ "epoch": 0.036062378167641324,
345
+ "grad_norm": NaN,
346
+ "learning_rate": 2.3875071764202563e-05,
347
+ "loss": 0.0,
348
+ "step": 37
349
+ },
350
+ {
351
+ "epoch": 0.037037037037037035,
352
+ "grad_norm": NaN,
353
+ "learning_rate": 2.061073738537635e-05,
354
+ "loss": 0.0,
355
+ "step": 38
356
+ },
357
+ {
358
+ "epoch": 0.038011695906432746,
359
+ "grad_norm": NaN,
360
+ "learning_rate": 1.7527597583490822e-05,
361
+ "loss": 0.0,
362
+ "step": 39
363
+ },
364
+ {
365
+ "epoch": 0.03898635477582846,
366
+ "grad_norm": NaN,
367
+ "learning_rate": 1.4644660940672627e-05,
368
+ "loss": 0.0,
369
+ "step": 40
370
+ },
371
+ {
372
+ "epoch": 0.03898635477582846,
373
+ "eval_loss": NaN,
374
+ "eval_runtime": 572.7858,
375
+ "eval_samples_per_second": 2.264,
376
+ "eval_steps_per_second": 0.567,
377
+ "step": 40
378
+ },
379
+ {
380
+ "epoch": 0.03996101364522417,
381
+ "grad_norm": NaN,
382
+ "learning_rate": 1.1979701719998453e-05,
383
+ "loss": 0.0,
384
+ "step": 41
385
+ },
386
+ {
387
+ "epoch": 0.04093567251461988,
388
+ "grad_norm": NaN,
389
+ "learning_rate": 9.549150281252633e-06,
390
+ "loss": 0.0,
391
+ "step": 42
392
+ },
393
+ {
394
+ "epoch": 0.04191033138401559,
395
+ "grad_norm": NaN,
396
+ "learning_rate": 7.367991782295391e-06,
397
+ "loss": 0.0,
398
+ "step": 43
399
+ },
400
+ {
401
+ "epoch": 0.042884990253411304,
402
+ "grad_norm": NaN,
403
+ "learning_rate": 5.449673790581611e-06,
404
+ "loss": 0.0,
405
+ "step": 44
406
+ },
407
+ {
408
+ "epoch": 0.042884990253411304,
409
+ "eval_loss": NaN,
410
+ "eval_runtime": 573.1049,
411
+ "eval_samples_per_second": 2.263,
412
+ "eval_steps_per_second": 0.567,
413
+ "step": 44
414
+ },
415
+ {
416
+ "epoch": 0.043859649122807015,
417
+ "grad_norm": NaN,
418
+ "learning_rate": 3.8060233744356633e-06,
419
+ "loss": 0.0,
420
+ "step": 45
421
+ },
422
+ {
423
+ "epoch": 0.04483430799220273,
424
+ "grad_norm": NaN,
425
+ "learning_rate": 2.4471741852423237e-06,
426
+ "loss": 0.0,
427
+ "step": 46
428
+ },
429
+ {
430
+ "epoch": 0.04580896686159844,
431
+ "grad_norm": NaN,
432
+ "learning_rate": 1.3815039801161721e-06,
433
+ "loss": 0.0,
434
+ "step": 47
435
+ },
436
+ {
437
+ "epoch": 0.04678362573099415,
438
+ "grad_norm": NaN,
439
+ "learning_rate": 6.15582970243117e-07,
440
+ "loss": 0.0,
441
+ "step": 48
442
+ },
443
+ {
444
+ "epoch": 0.04678362573099415,
445
+ "eval_loss": NaN,
446
+ "eval_runtime": 572.8438,
447
+ "eval_samples_per_second": 2.264,
448
+ "eval_steps_per_second": 0.567,
449
+ "step": 48
450
+ },
451
+ {
452
+ "epoch": 0.04775828460038986,
453
+ "grad_norm": NaN,
454
+ "learning_rate": 1.5413331334360182e-07,
455
+ "loss": 0.0,
456
+ "step": 49
457
+ },
458
+ {
459
+ "epoch": 0.04873294346978557,
460
+ "grad_norm": NaN,
461
+ "learning_rate": 0.0,
462
+ "loss": 0.0,
463
+ "step": 50
464
  }
465
  ],
466
  "logging_steps": 1,
 
475
  "should_evaluate": false,
476
  "should_log": false,
477
  "should_save": true,
478
+ "should_training_stop": true
479
  },
480
  "attributes": {}
481
  }
482
  },
483
+ "total_flos": 1.972183618289664e+17,
484
  "train_batch_size": 4,
485
  "trial_name": null,
486
  "trial_params": null