leixa commited on
Commit
028c36c
1 Parent(s): a0396b5

Training in progress, step 279, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:781893ab268a460d6c6281d7328238df1862c326fc57bc1a1cce25d4e4fca254
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0cf25f76e8662756856fd1a097be6ff3870d9f310686bf41f15fe00f6e14dc4
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6927a83945068a1faaa14b39736f3798ea663c353aa04ca6e7da7e5afc917be
3
- size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:742095389fc358571b77e198008ebd180a04e2384f21dd14cc648803058e1cfe
3
+ size 325340244
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f5d3012cf842ae96026cfb7617d36b883980d38f99422e689133b1c4367194e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28e1effb06033458f08c521267ddbc73b4a5a3e148e528b2cfd2ce1d0d17a805
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4195f21f3ab721f41e157ad5801251bc3ef7a33dc24988ba85a5ba8d1d1f0e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de2e7670b3561000eee216684d0727bea9800d1c3f3b2422105732155595c43d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0121703853955375,
5
  "eval_steps": 31,
6
- "global_step": 248,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -653,6 +653,91 @@
653
  "eval_samples_per_second": 15.489,
654
  "eval_steps_per_second": 1.936,
655
  "step": 248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
  }
657
  ],
658
  "logging_steps": 3,
@@ -672,7 +757,7 @@
672
  "attributes": {}
673
  }
674
  },
675
- "total_flos": 3.293808601155502e+17,
676
  "train_batch_size": 8,
677
  "trial_name": null,
678
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.26369168356998,
5
  "eval_steps": 31,
6
+ "global_step": 279,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
653
  "eval_samples_per_second": 15.489,
654
  "eval_steps_per_second": 1.936,
655
  "step": 248
656
+ },
657
+ {
658
+ "epoch": 2.020283975659229,
659
+ "grad_norm": 3.395972967147827,
660
+ "learning_rate": 2.537882199482665e-05,
661
+ "loss": 1.1378,
662
+ "step": 249
663
+ },
664
+ {
665
+ "epoch": 2.0446247464503045,
666
+ "grad_norm": 4.442982196807861,
667
+ "learning_rate": 2.4248096254497288e-05,
668
+ "loss": 1.253,
669
+ "step": 252
670
+ },
671
+ {
672
+ "epoch": 2.0689655172413794,
673
+ "grad_norm": 5.2550764083862305,
674
+ "learning_rate": 2.3135019582658802e-05,
675
+ "loss": 1.0433,
676
+ "step": 255
677
+ },
678
+ {
679
+ "epoch": 2.0933062880324544,
680
+ "grad_norm": 4.615274429321289,
681
+ "learning_rate": 2.2040354826462668e-05,
682
+ "loss": 1.1078,
683
+ "step": 258
684
+ },
685
+ {
686
+ "epoch": 2.1176470588235294,
687
+ "grad_norm": 5.723622798919678,
688
+ "learning_rate": 2.0964852214453013e-05,
689
+ "loss": 0.9585,
690
+ "step": 261
691
+ },
692
+ {
693
+ "epoch": 2.1419878296146044,
694
+ "grad_norm": 4.3719587326049805,
695
+ "learning_rate": 1.9909248842397584e-05,
696
+ "loss": 0.9587,
697
+ "step": 264
698
+ },
699
+ {
700
+ "epoch": 2.1663286004056794,
701
+ "grad_norm": 6.265243053436279,
702
+ "learning_rate": 1.887426816811903e-05,
703
+ "loss": 0.9681,
704
+ "step": 267
705
+ },
706
+ {
707
+ "epoch": 2.1906693711967544,
708
+ "grad_norm": 5.796363830566406,
709
+ "learning_rate": 1.7860619515673033e-05,
710
+ "loss": 1.0059,
711
+ "step": 270
712
+ },
713
+ {
714
+ "epoch": 2.2150101419878294,
715
+ "grad_norm": 5.817225456237793,
716
+ "learning_rate": 1.6868997589213136e-05,
717
+ "loss": 1.0253,
718
+ "step": 273
719
+ },
720
+ {
721
+ "epoch": 2.239350912778905,
722
+ "grad_norm": 4.450856685638428,
723
+ "learning_rate": 1.5900081996875083e-05,
724
+ "loss": 0.7533,
725
+ "step": 276
726
+ },
727
+ {
728
+ "epoch": 2.26369168356998,
729
+ "grad_norm": 7.340899467468262,
730
+ "learning_rate": 1.4954536785007456e-05,
731
+ "loss": 0.9246,
732
+ "step": 279
733
+ },
734
+ {
735
+ "epoch": 2.26369168356998,
736
+ "eval_loss": 0.5402039885520935,
737
+ "eval_runtime": 13.4355,
738
+ "eval_samples_per_second": 15.481,
739
+ "eval_steps_per_second": 1.935,
740
+ "step": 279
741
  }
742
  ],
743
  "logging_steps": 3,
 
757
  "attributes": {}
758
  }
759
  },
760
+ "total_flos": 3.706262291176489e+17,
761
  "train_batch_size": 8,
762
  "trial_name": null,
763
  "trial_params": null