leixa commited on
Commit
a8b4ffe
·
verified ·
1 Parent(s): dee3cce

Training in progress, step 84, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:770140205d2dac43d3cffa97db7b856382360e2b5d689600fb6a190f3a214871
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31950ae9617f892cd1a2f1b3497a6c89d63f13022f3b417331eb374c2d0a0aa6
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a359e38865ee20b10b8cc286fbdbd671be6b4685839e72323ccb7768a4790a4
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9882a0339d70f771ed47074c510984676dd1f90485341b9d71c79a6578d4a801
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff00fff9cbfcd118e54c77073688cb2b84440267aaa2d4e090ca843731701c95
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:611585759af06adf1fef356a0b29f640225adbdcc1a21f2c2557f30972d4755b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:731f9a38a306fb54b040f8655d8ca8de9e109511292676024e32dd381563f07a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c082198971fde7d345d487d7f9a561afc194de17619b9de850d57aaedab580c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7613293051359517,
5
  "eval_steps": 21,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -186,6 +186,63 @@
186
  "eval_samples_per_second": 532.785,
187
  "eval_steps_per_second": 68.501,
188
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ],
191
  "logging_steps": 3,
@@ -205,7 +262,7 @@
205
  "attributes": {}
206
  }
207
  },
208
- "total_flos": 7027691618304.0,
209
  "train_batch_size": 8,
210
  "trial_name": null,
211
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0181268882175227,
5
  "eval_steps": 21,
6
+ "global_step": 84,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
186
  "eval_samples_per_second": 532.785,
187
  "eval_steps_per_second": 68.501,
188
  "step": 63
189
+ },
190
+ {
191
+ "epoch": 0.797583081570997,
192
+ "grad_norm": 0.2844769358634949,
193
+ "learning_rate": 8.705445194510868e-05,
194
+ "loss": 10.3075,
195
+ "step": 66
196
+ },
197
+ {
198
+ "epoch": 0.8338368580060423,
199
+ "grad_norm": 0.2514300048351288,
200
+ "learning_rate": 8.570216784695637e-05,
201
+ "loss": 10.3049,
202
+ "step": 69
203
+ },
204
+ {
205
+ "epoch": 0.8700906344410876,
206
+ "grad_norm": 0.24744316935539246,
207
+ "learning_rate": 8.429437201905254e-05,
208
+ "loss": 10.295,
209
+ "step": 72
210
+ },
211
+ {
212
+ "epoch": 0.9063444108761329,
213
+ "grad_norm": 0.21623125672340393,
214
+ "learning_rate": 8.283325338118153e-05,
215
+ "loss": 10.2903,
216
+ "step": 75
217
+ },
218
+ {
219
+ "epoch": 0.9425981873111783,
220
+ "grad_norm": 0.21527834236621857,
221
+ "learning_rate": 8.132108376241849e-05,
222
+ "loss": 10.2817,
223
+ "step": 78
224
+ },
225
+ {
226
+ "epoch": 0.9788519637462235,
227
+ "grad_norm": 0.2678958475589752,
228
+ "learning_rate": 7.97602143687623e-05,
229
+ "loss": 10.2804,
230
+ "step": 81
231
+ },
232
+ {
233
+ "epoch": 1.0181268882175227,
234
+ "grad_norm": 0.20537346601486206,
235
+ "learning_rate": 7.815307212734888e-05,
236
+ "loss": 11.7642,
237
+ "step": 84
238
+ },
239
+ {
240
+ "epoch": 1.0181268882175227,
241
+ "eval_loss": 10.264846801757812,
242
+ "eval_runtime": 0.2605,
243
+ "eval_samples_per_second": 537.448,
244
+ "eval_steps_per_second": 69.1,
245
+ "step": 84
246
  }
247
  ],
248
  "logging_steps": 3,
 
262
  "attributes": {}
263
  }
264
  },
265
+ "total_flos": 9370255491072.0,
266
  "train_batch_size": 8,
267
  "trial_name": null,
268
  "trial_params": null