Nexspear commited on
Commit
9d31841
·
verified ·
1 Parent(s): cbcb8ce

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a87a718681ae26de2607be4cda2d04b6af6b27d591be2c68820c9f934a7c2b0
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5685d41458363868dd6062a44e770be5a7cd162528a7419361b5f9c4b8135aff
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9138bd6a415b1283b8cae260b2d5aa498f2918d4d3ce14de33ccba4ab5193f94
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b04185bc9c4a212cb29759e873b7839340867473b9f3b1d14465d77a47522b1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f396245bdb8d5e1c75a5bff1988c6875ecb018cc348f9089532a15533946e6c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b2b9ceeb1c62606063fec42abb6728182239a71c3611c8bd65d3e5863308d7b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.306122448979592,
5
  "eval_steps": 4,
6
- "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -82,6 +82,21 @@
82
  "eval_samples_per_second": 13.746,
83
  "eval_steps_per_second": 1.964,
84
  "step": 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  }
86
  ],
87
  "logging_steps": 3,
@@ -101,7 +116,7 @@
101
  "attributes": {}
102
  }
103
  },
104
- "total_flos": 2.380622991458304e+16,
105
  "train_batch_size": 8,
106
  "trial_name": null,
107
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6326530612244898,
5
  "eval_steps": 4,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
82
  "eval_samples_per_second": 13.746,
83
  "eval_steps_per_second": 1.964,
84
  "step": 16
85
+ },
86
+ {
87
+ "epoch": 1.469387755102041,
88
+ "grad_norm": NaN,
89
+ "learning_rate": 7.985792958513931e-05,
90
+ "loss": 0.0,
91
+ "step": 18
92
+ },
93
+ {
94
+ "epoch": 1.6326530612244898,
95
+ "eval_loss": NaN,
96
+ "eval_runtime": 1.5287,
97
+ "eval_samples_per_second": 13.738,
98
+ "eval_steps_per_second": 1.963,
99
+ "step": 20
100
  }
101
  ],
102
  "logging_steps": 3,
 
116
  "attributes": {}
117
  }
118
  },
119
+ "total_flos": 2.9840284229566464e+16,
120
  "train_batch_size": 8,
121
  "trial_name": null,
122
  "trial_params": null