dzanbek commited on
Commit
45d82bb
1 Parent(s): 904e1d6

Training in progress, step 16, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aee943c88866576b3660584993976cad91a8ccfbe94620f937bc0740728f3a8
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30068b0b0922b30210f2b35c151ff3844cd2b8f68bc350a3a1bd8e06deaa7e10
3
  size 335922386
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d0b607ea403567cbca8c8fef3b4eb224d8a3303f58fdb16fa63b99df273bc5f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80ea0d2fcb5de2fb11891e897936e88df521e68f228c6d529d75261583df57e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c21e50d3a1bfdf9ca01a5258fbc54dba397e86dfc8fedbdf3aa91d3709f67147
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea7d2408b3a422851243030b42dda993dfe2492d0a051609097eef94f3c74fce
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00043831842267984754,
5
  "eval_steps": 2,
6
- "global_step": 14,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -169,6 +169,28 @@
169
  "eval_samples_per_second": 4.187,
170
  "eval_steps_per_second": 4.187,
171
  "step": 14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  }
173
  ],
174
  "logging_steps": 1,
@@ -188,7 +210,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 9843812981538816.0,
192
  "train_batch_size": 1,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.00050093534020554,
5
  "eval_steps": 2,
6
+ "global_step": 16,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
169
  "eval_samples_per_second": 4.187,
170
  "eval_steps_per_second": 4.187,
171
  "step": 14
172
+ },
173
+ {
174
+ "epoch": 0.0004696268814426938,
175
+ "grad_norm": NaN,
176
+ "learning_rate": 0.00015000000000000001,
177
+ "loss": 0.0,
178
+ "step": 15
179
+ },
180
+ {
181
+ "epoch": 0.00050093534020554,
182
+ "grad_norm": NaN,
183
+ "learning_rate": 0.00013090169943749476,
184
+ "loss": 0.0,
185
+ "step": 16
186
+ },
187
+ {
188
+ "epoch": 0.00050093534020554,
189
+ "eval_loss": NaN,
190
+ "eval_runtime": 3323.0534,
191
+ "eval_samples_per_second": 4.047,
192
+ "eval_steps_per_second": 4.047,
193
+ "step": 16
194
  }
195
  ],
196
  "logging_steps": 1,
 
210
  "attributes": {}
211
  }
212
  },
213
+ "total_flos": 1.1250071978901504e+16,
214
  "train_batch_size": 1,
215
  "trial_name": null,
216
  "trial_params": null