leixa commited on
Commit
06d870c
1 Parent(s): 9b5dd15

Training in progress, step 99, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db1dc30bd262bcdcc9b11ac6858c3fdc0bf50b77419a1ab6ded40fc5b8448cbc
3
  size 50503544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5cf05509147ae289a6401c175787541a86e7b27b9e02822d13d3c68a9b551ee8
3
  size 50503544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1cac41c925e8ebeb2ac3e01fe2cfcfab106ea8e627aa3b3d65d32b44f8b7412
3
  size 25986148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda070ce8697c300bbcd60d0635b8325887f21fdcc6715ad16e17bcfe2c59e4a
3
  size 25986148
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d40f761f58490413a6bc1eba33545e30eb8837c7707dca5575393f3f81beb33
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0104cd3a39a39e7efe96d4368d017e590c56f84656f77b6c2e0cc8bfebf6aafb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb97b24f34ff3e53eec5be9cf35c1a7161c58dbc2fed7dda160fb3eb64e5f353
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd09e47b42474af18ba1772930aac1db12bb0a5a5868e68e151935c59da27a51
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.037578288100208766,
5
  "eval_steps": 17,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -685,6 +685,69 @@
685
  "learning_rate": 6.030737921409169e-06,
686
  "loss": 2.6747,
687
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688
  }
689
  ],
690
  "logging_steps": 1,
@@ -704,7 +767,7 @@
704
  "attributes": {}
705
  }
706
  },
707
- "total_flos": 3.705727579324416e+16,
708
  "train_batch_size": 4,
709
  "trial_name": null,
710
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04133611691022965,
5
  "eval_steps": 17,
6
+ "global_step": 99,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
685
  "learning_rate": 6.030737921409169e-06,
686
  "loss": 2.6747,
687
  "step": 90
688
+ },
689
+ {
690
+ "epoch": 0.037995824634655534,
691
+ "grad_norm": Infinity,
692
+ "learning_rate": 4.8943483704846475e-06,
693
+ "loss": 2.9142,
694
+ "step": 91
695
+ },
696
+ {
697
+ "epoch": 0.038413361169102295,
698
+ "grad_norm": Infinity,
699
+ "learning_rate": 3.873830406168111e-06,
700
+ "loss": 2.9999,
701
+ "step": 92
702
+ },
703
+ {
704
+ "epoch": 0.03883089770354906,
705
+ "grad_norm": Infinity,
706
+ "learning_rate": 2.970427372400353e-06,
707
+ "loss": 2.9339,
708
+ "step": 93
709
+ },
710
+ {
711
+ "epoch": 0.03924843423799582,
712
+ "grad_norm": Infinity,
713
+ "learning_rate": 2.1852399266194314e-06,
714
+ "loss": 3.0186,
715
+ "step": 94
716
+ },
717
+ {
718
+ "epoch": 0.03966597077244259,
719
+ "grad_norm": Infinity,
720
+ "learning_rate": 1.5192246987791981e-06,
721
+ "loss": 2.7521,
722
+ "step": 95
723
+ },
724
+ {
725
+ "epoch": 0.04008350730688935,
726
+ "grad_norm": Infinity,
727
+ "learning_rate": 9.731931258429638e-07,
728
+ "loss": 2.8372,
729
+ "step": 96
730
+ },
731
+ {
732
+ "epoch": 0.04050104384133612,
733
+ "grad_norm": Infinity,
734
+ "learning_rate": 5.478104631726711e-07,
735
+ "loss": 2.8114,
736
+ "step": 97
737
+ },
738
+ {
739
+ "epoch": 0.04091858037578288,
740
+ "grad_norm": Infinity,
741
+ "learning_rate": 2.4359497401758024e-07,
742
+ "loss": 2.7142,
743
+ "step": 98
744
+ },
745
+ {
746
+ "epoch": 0.04133611691022965,
747
+ "grad_norm": Infinity,
748
+ "learning_rate": 6.09172980904238e-08,
749
+ "loss": 3.1884,
750
+ "step": 99
751
  }
752
  ],
753
  "logging_steps": 1,
 
767
  "attributes": {}
768
  }
769
  },
770
+ "total_flos": 4.076300337256858e+16,
771
  "train_batch_size": 4,
772
  "trial_name": null,
773
  "trial_params": null