leixa commited on
Commit
b197db5
1 Parent(s): 06d870c

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda070ce8697c300bbcd60d0635b8325887f21fdcc6715ad16e17bcfe2c59e4a
3
  size 25986148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f5821ef7ab41bd6f9c2685b0841c6a9fbd8d5787f6afbc7ad1da85983167fbe
3
  size 25986148
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0104cd3a39a39e7efe96d4368d017e590c56f84656f77b6c2e0cc8bfebf6aafb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2ee64302c2e6d99452e0846f387608fbc785131d28043c874888db195b7f800
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd09e47b42474af18ba1772930aac1db12bb0a5a5868e68e151935c59da27a51
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04133611691022965,
5
  "eval_steps": 17,
6
- "global_step": 99,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -748,6 +748,13 @@
748
  "learning_rate": 6.09172980904238e-08,
749
  "loss": 3.1884,
750
  "step": 99
 
 
 
 
 
 
 
751
  }
752
  ],
753
  "logging_steps": 1,
@@ -762,12 +769,12 @@
762
  "should_evaluate": false,
763
  "should_log": false,
764
  "should_save": true,
765
- "should_training_stop": false
766
  },
767
  "attributes": {}
768
  }
769
  },
770
- "total_flos": 4.076300337256858e+16,
771
  "train_batch_size": 4,
772
  "trial_name": null,
773
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04175365344467641,
5
  "eval_steps": 17,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
748
  "learning_rate": 6.09172980904238e-08,
749
  "loss": 3.1884,
750
  "step": 99
751
+ },
752
+ {
753
+ "epoch": 0.04175365344467641,
754
+ "grad_norm": Infinity,
755
+ "learning_rate": 0.0,
756
+ "loss": 3.0133,
757
+ "step": 100
758
  }
759
  ],
760
  "logging_steps": 1,
 
769
  "should_evaluate": false,
770
  "should_log": false,
771
  "should_save": true,
772
+ "should_training_stop": true
773
  },
774
  "attributes": {}
775
  }
776
  },
777
+ "total_flos": 4.11747508813824e+16,
778
  "train_batch_size": 4,
779
  "trial_name": null,
780
  "trial_params": null