ardaspear commited on
Commit
b9f8015
·
verified ·
1 Parent(s): 89a12de

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ba79555c3c3efdec97edd16076639700328263b318a1b74af3683d0725d022a
3
  size 6804608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:326269bda607e9436b11dfb9ebcdb78b3ec20eb12cdc84d18d502c96821547f3
3
  size 6804608
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4e708dc4a1735af4b8afbf7be39cec4a4d655c4d0822adab89c56c114b0e68a
3
  size 3633530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92ad43829ccf54e5c92412c808a2c70c658b588f9711e5c46e16dba375b39508
3
  size 3633530
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2baafb56fe3a2619cfc0fecd20a068c58a057e4c42cd93b97063a3d1ac12cb5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1522fdb5ef1b924ce4355d718f38cdadabd1f7abd6649f453c5e8fce157ce1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ddb9588ea654e56e83effcf81a2bc03480954babcf6415cb44d41d3bfb8039f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8ce05761f46e7cf72fb17a02e3a0ca15c9d25ce3babf590eeb40568923b8bac
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.025050100200400802,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -171,6 +171,84 @@
171
  "eval_samples_per_second": 92.152,
172
  "eval_steps_per_second": 46.103,
173
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  }
175
  ],
176
  "logging_steps": 5,
@@ -190,7 +268,7 @@
190
  "attributes": {}
191
  }
192
  },
193
- "total_flos": 685853009510400.0,
194
  "train_batch_size": 2,
195
  "trial_name": null,
196
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.037575150300601205,
5
  "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
171
  "eval_samples_per_second": 92.152,
172
  "eval_steps_per_second": 46.103,
173
  "step": 100
174
+ },
175
+ {
176
+ "epoch": 0.02630260521042084,
177
+ "grad_norm": 1.3435356616973877,
178
+ "learning_rate": 5e-05,
179
+ "loss": 2.2789,
180
+ "step": 105
181
+ },
182
+ {
183
+ "epoch": 0.02755511022044088,
184
+ "grad_norm": 1.5827653408050537,
185
+ "learning_rate": 4.5871032726383386e-05,
186
+ "loss": 2.2494,
187
+ "step": 110
188
+ },
189
+ {
190
+ "epoch": 0.028807615230460923,
191
+ "grad_norm": 1.6257189512252808,
192
+ "learning_rate": 4.17702704859633e-05,
193
+ "loss": 2.1587,
194
+ "step": 115
195
+ },
196
+ {
197
+ "epoch": 0.03006012024048096,
198
+ "grad_norm": 1.565990686416626,
199
+ "learning_rate": 3.772572564296005e-05,
200
+ "loss": 2.2483,
201
+ "step": 120
202
+ },
203
+ {
204
+ "epoch": 0.031312625250501,
205
+ "grad_norm": 1.6951042413711548,
206
+ "learning_rate": 3.3765026539765834e-05,
207
+ "loss": 2.1623,
208
+ "step": 125
209
+ },
210
+ {
211
+ "epoch": 0.03256513026052104,
212
+ "grad_norm": 1.3800190687179565,
213
+ "learning_rate": 2.991522876735154e-05,
214
+ "loss": 2.1412,
215
+ "step": 130
216
+ },
217
+ {
218
+ "epoch": 0.03381763527054108,
219
+ "grad_norm": 1.6663275957107544,
220
+ "learning_rate": 2.6202630348146324e-05,
221
+ "loss": 2.1782,
222
+ "step": 135
223
+ },
224
+ {
225
+ "epoch": 0.03507014028056112,
226
+ "grad_norm": 1.4055718183517456,
227
+ "learning_rate": 2.2652592093878666e-05,
228
+ "loss": 2.1664,
229
+ "step": 140
230
+ },
231
+ {
232
+ "epoch": 0.036322645290581164,
233
+ "grad_norm": 1.295013189315796,
234
+ "learning_rate": 1.928936436551661e-05,
235
+ "loss": 2.0866,
236
+ "step": 145
237
+ },
238
+ {
239
+ "epoch": 0.037575150300601205,
240
+ "grad_norm": 2.0372097492218018,
241
+ "learning_rate": 1.6135921418712956e-05,
242
+ "loss": 2.0893,
243
+ "step": 150
244
+ },
245
+ {
246
+ "epoch": 0.037575150300601205,
247
+ "eval_loss": 2.1908504962921143,
248
+ "eval_runtime": 18.5604,
249
+ "eval_samples_per_second": 90.569,
250
+ "eval_steps_per_second": 45.312,
251
+ "step": 150
252
  }
253
  ],
254
  "logging_steps": 5,
 
268
  "attributes": {}
269
  }
270
  },
271
+ "total_flos": 1028779514265600.0,
272
  "train_batch_size": 2,
273
  "trial_name": null,
274
  "trial_params": null