sekarmulyani commited on
Commit
539447a
·
1 Parent(s): 8d53a12

Upload 8 files

Browse files
Files changed (5) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +295 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff010bc66fd61f1d34710e22fc453cbd4986ef52cbca0f808e71d2287359c01a
3
  size 995641861
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0665fe7d442f8bbba1ae059bff6270660242a07678e0703e7514cb19706073e0
3
  size 995641861
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86bf6ab09b64321efc94f3bc379e531d0a1338c8c5fa3b38c7c52464847c79d8
3
  size 497807197
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b013aed3f4a82aeebcba3e970e376727406e6a2ed7078b6b88a510efdcc8f6e2
3
  size 497807197
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dd3a816ab8628e6038ecf426e93a907752049203fbc39b63fcde557182a866f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f6cf74835c1af9f9e3dc4bcfbc0eae1e84048401ffb87d26ff318411e17c02d
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41b18054e524341e87a895cc798ffc44bc6c3d095dc41640d72b87475609e792
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381b4f1af09e750b9ce29da1e140136f186310be59fcc0dc325e00c9f5f3a3d0
3
  size 627
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 34431,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -439,13 +439,305 @@
439
  "eval_samples_per_second": 41.687,
440
  "eval_steps_per_second": 5.211,
441
  "step": 34431
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  }
443
  ],
444
  "logging_steps": 500,
445
  "max_steps": 91816,
446
  "num_train_epochs": 8,
447
  "save_steps": 500,
448
- "total_flos": 5.397574828032e+16,
449
  "trial_name": null,
450
  "trial_params": null
451
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 57385,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
439
  "eval_samples_per_second": 41.687,
440
  "eval_steps_per_second": 5.211,
441
  "step": 34431
442
+ },
443
+ {
444
+ "epoch": 3.01,
445
+ "learning_rate": 6.24248496993988e-06,
446
+ "loss": 1.5506,
447
+ "step": 34500
448
+ },
449
+ {
450
+ "epoch": 3.05,
451
+ "learning_rate": 6.188028230373791e-06,
452
+ "loss": 1.559,
453
+ "step": 35000
454
+ },
455
+ {
456
+ "epoch": 3.09,
457
+ "learning_rate": 6.133571490807702e-06,
458
+ "loss": 1.5388,
459
+ "step": 35500
460
+ },
461
+ {
462
+ "epoch": 3.14,
463
+ "learning_rate": 6.079114751241613e-06,
464
+ "loss": 1.5467,
465
+ "step": 36000
466
+ },
467
+ {
468
+ "epoch": 3.18,
469
+ "learning_rate": 6.024658011675526e-06,
470
+ "loss": 1.5391,
471
+ "step": 36500
472
+ },
473
+ {
474
+ "epoch": 3.22,
475
+ "learning_rate": 5.970201272109437e-06,
476
+ "loss": 1.5364,
477
+ "step": 37000
478
+ },
479
+ {
480
+ "epoch": 3.27,
481
+ "learning_rate": 5.915744532543348e-06,
482
+ "loss": 1.5376,
483
+ "step": 37500
484
+ },
485
+ {
486
+ "epoch": 3.31,
487
+ "learning_rate": 5.861287792977259e-06,
488
+ "loss": 1.5397,
489
+ "step": 38000
490
+ },
491
+ {
492
+ "epoch": 3.35,
493
+ "learning_rate": 5.806831053411171e-06,
494
+ "loss": 1.5336,
495
+ "step": 38500
496
+ },
497
+ {
498
+ "epoch": 3.4,
499
+ "learning_rate": 5.752374313845082e-06,
500
+ "loss": 1.5378,
501
+ "step": 39000
502
+ },
503
+ {
504
+ "epoch": 3.44,
505
+ "learning_rate": 5.697917574278993e-06,
506
+ "loss": 1.5318,
507
+ "step": 39500
508
+ },
509
+ {
510
+ "epoch": 3.49,
511
+ "learning_rate": 5.643460834712905e-06,
512
+ "loss": 1.5252,
513
+ "step": 40000
514
+ },
515
+ {
516
+ "epoch": 3.53,
517
+ "learning_rate": 5.589004095146816e-06,
518
+ "loss": 1.5333,
519
+ "step": 40500
520
+ },
521
+ {
522
+ "epoch": 3.57,
523
+ "learning_rate": 5.5345473555807275e-06,
524
+ "loss": 1.5299,
525
+ "step": 41000
526
+ },
527
+ {
528
+ "epoch": 3.62,
529
+ "learning_rate": 5.4800906160146385e-06,
530
+ "loss": 1.5215,
531
+ "step": 41500
532
+ },
533
+ {
534
+ "epoch": 3.66,
535
+ "learning_rate": 5.4256338764485495e-06,
536
+ "loss": 1.52,
537
+ "step": 42000
538
+ },
539
+ {
540
+ "epoch": 3.7,
541
+ "learning_rate": 5.3711771368824605e-06,
542
+ "loss": 1.5258,
543
+ "step": 42500
544
+ },
545
+ {
546
+ "epoch": 3.75,
547
+ "learning_rate": 5.316720397316373e-06,
548
+ "loss": 1.5256,
549
+ "step": 43000
550
+ },
551
+ {
552
+ "epoch": 3.79,
553
+ "learning_rate": 5.262263657750284e-06,
554
+ "loss": 1.5205,
555
+ "step": 43500
556
+ },
557
+ {
558
+ "epoch": 3.83,
559
+ "learning_rate": 5.207806918184195e-06,
560
+ "loss": 1.5236,
561
+ "step": 44000
562
+ },
563
+ {
564
+ "epoch": 3.88,
565
+ "learning_rate": 5.153350178618106e-06,
566
+ "loss": 1.5281,
567
+ "step": 44500
568
+ },
569
+ {
570
+ "epoch": 3.92,
571
+ "learning_rate": 5.098893439052017e-06,
572
+ "loss": 1.5175,
573
+ "step": 45000
574
+ },
575
+ {
576
+ "epoch": 3.96,
577
+ "learning_rate": 5.044436699485928e-06,
578
+ "loss": 1.5215,
579
+ "step": 45500
580
+ },
581
+ {
582
+ "epoch": 4.0,
583
+ "eval_loss": 1.5107132196426392,
584
+ "eval_runtime": 3.2672,
585
+ "eval_samples_per_second": 41.625,
586
+ "eval_steps_per_second": 5.203,
587
+ "step": 45908
588
+ },
589
+ {
590
+ "epoch": 4.01,
591
+ "learning_rate": 4.98997995991984e-06,
592
+ "loss": 1.5202,
593
+ "step": 46000
594
+ },
595
+ {
596
+ "epoch": 4.05,
597
+ "learning_rate": 4.935523220353751e-06,
598
+ "loss": 1.5136,
599
+ "step": 46500
600
+ },
601
+ {
602
+ "epoch": 4.1,
603
+ "learning_rate": 4.881066480787663e-06,
604
+ "loss": 1.5119,
605
+ "step": 47000
606
+ },
607
+ {
608
+ "epoch": 4.14,
609
+ "learning_rate": 4.826609741221574e-06,
610
+ "loss": 1.5052,
611
+ "step": 47500
612
+ },
613
+ {
614
+ "epoch": 4.18,
615
+ "learning_rate": 4.772153001655485e-06,
616
+ "loss": 1.5088,
617
+ "step": 48000
618
+ },
619
+ {
620
+ "epoch": 4.23,
621
+ "learning_rate": 4.717696262089397e-06,
622
+ "loss": 1.5078,
623
+ "step": 48500
624
+ },
625
+ {
626
+ "epoch": 4.27,
627
+ "learning_rate": 4.663239522523308e-06,
628
+ "loss": 1.5099,
629
+ "step": 49000
630
+ },
631
+ {
632
+ "epoch": 4.31,
633
+ "learning_rate": 4.608782782957219e-06,
634
+ "loss": 1.5098,
635
+ "step": 49500
636
+ },
637
+ {
638
+ "epoch": 4.36,
639
+ "learning_rate": 4.5543260433911305e-06,
640
+ "loss": 1.5044,
641
+ "step": 50000
642
+ },
643
+ {
644
+ "epoch": 4.4,
645
+ "learning_rate": 4.4998693038250415e-06,
646
+ "loss": 1.5049,
647
+ "step": 50500
648
+ },
649
+ {
650
+ "epoch": 4.44,
651
+ "learning_rate": 4.445412564258953e-06,
652
+ "loss": 1.4958,
653
+ "step": 51000
654
+ },
655
+ {
656
+ "epoch": 4.49,
657
+ "learning_rate": 4.390955824692864e-06,
658
+ "loss": 1.5073,
659
+ "step": 51500
660
+ },
661
+ {
662
+ "epoch": 4.53,
663
+ "learning_rate": 4.336499085126776e-06,
664
+ "loss": 1.5015,
665
+ "step": 52000
666
+ },
667
+ {
668
+ "epoch": 4.57,
669
+ "learning_rate": 4.282042345560687e-06,
670
+ "loss": 1.5022,
671
+ "step": 52500
672
+ },
673
+ {
674
+ "epoch": 4.62,
675
+ "learning_rate": 4.227585605994598e-06,
676
+ "loss": 1.4973,
677
+ "step": 53000
678
+ },
679
+ {
680
+ "epoch": 4.66,
681
+ "learning_rate": 4.17312886642851e-06,
682
+ "loss": 1.4944,
683
+ "step": 53500
684
+ },
685
+ {
686
+ "epoch": 4.71,
687
+ "learning_rate": 4.118672126862421e-06,
688
+ "loss": 1.5024,
689
+ "step": 54000
690
+ },
691
+ {
692
+ "epoch": 4.75,
693
+ "learning_rate": 4.064215387296332e-06,
694
+ "loss": 1.4981,
695
+ "step": 54500
696
+ },
697
+ {
698
+ "epoch": 4.79,
699
+ "learning_rate": 4.009758647730244e-06,
700
+ "loss": 1.4942,
701
+ "step": 55000
702
+ },
703
+ {
704
+ "epoch": 4.84,
705
+ "learning_rate": 3.955301908164155e-06,
706
+ "loss": 1.4964,
707
+ "step": 55500
708
+ },
709
+ {
710
+ "epoch": 4.88,
711
+ "learning_rate": 3.900845168598066e-06,
712
+ "loss": 1.4995,
713
+ "step": 56000
714
+ },
715
+ {
716
+ "epoch": 4.92,
717
+ "learning_rate": 3.846388429031978e-06,
718
+ "loss": 1.4959,
719
+ "step": 56500
720
+ },
721
+ {
722
+ "epoch": 4.97,
723
+ "learning_rate": 3.7919316894658886e-06,
724
+ "loss": 1.4922,
725
+ "step": 57000
726
+ },
727
+ {
728
+ "epoch": 5.0,
729
+ "eval_loss": 1.4904537200927734,
730
+ "eval_runtime": 3.2628,
731
+ "eval_samples_per_second": 41.682,
732
+ "eval_steps_per_second": 5.21,
733
+ "step": 57385
734
  }
735
  ],
736
  "logging_steps": 500,
737
  "max_steps": 91816,
738
  "num_train_epochs": 8,
739
  "save_steps": 500,
740
+ "total_flos": 8.99595804672e+16,
741
  "trial_name": null,
742
  "trial_params": null
743
  }