dimasik87 commited on
Commit
cc79b64
1 Parent(s): a6f1209

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6338372961508d0e6e53f4b425dedb3b1d009925d6beed6c8fe137d15a8822
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c315f50b22f72dd14cba2058fe33721bf21668ba86eb591bd3ac36c9d62b32
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c43bc5142208879f6c21142d760abc96f24db666fb369b9d6d73834b699e85a2
3
  size 168149074
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5706597b330e90541512aede454908ec2b68248e9b4ce53e5319deaf4b4c3be1
3
  size 168149074
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca4ef0be595a40805daa15596cdef4ec088c37a282b79824e24568848d376f7f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e077675f66773a81cbf019fe747215a64463738c7179426c26ea0177d7f67d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d605401690d7669ff16aeaca6820cbd8d0d605afe748c51045ce90888810a22
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baae720e33260fead254c87141d85e241b839ae924033bfd9652fb777f3f1bf0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0003387741879476848,
5
  "eval_steps": 5,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -187,6 +187,92 @@
187
  "eval_samples_per_second": 9.697,
188
  "eval_steps_per_second": 4.849,
189
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ],
192
  "logging_steps": 1,
@@ -206,7 +292,7 @@
206
  "attributes": {}
207
  }
208
  },
209
- "total_flos": 2.804271657517056e+16,
210
  "train_batch_size": 2,
211
  "trial_name": null,
212
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0005081612819215272,
5
  "eval_steps": 5,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
187
  "eval_samples_per_second": 9.697,
188
  "eval_steps_per_second": 4.849,
189
  "step": 20
190
+ },
191
+ {
192
+ "epoch": 0.000355712897345069,
193
+ "grad_norm": 17.683870315551758,
194
+ "learning_rate": 0.00016494480483301836,
195
+ "loss": 17.1021,
196
+ "step": 21
197
+ },
198
+ {
199
+ "epoch": 0.00037265160674245326,
200
+ "grad_norm": 18.684045791625977,
201
+ "learning_rate": 0.00015877852522924732,
202
+ "loss": 15.9178,
203
+ "step": 22
204
+ },
205
+ {
206
+ "epoch": 0.0003895903161398375,
207
+ "grad_norm": 20.173887252807617,
208
+ "learning_rate": 0.0001522498564715949,
209
+ "loss": 17.3176,
210
+ "step": 23
211
+ },
212
+ {
213
+ "epoch": 0.00040652902553722173,
214
+ "grad_norm": 19.331239700317383,
215
+ "learning_rate": 0.00014539904997395468,
216
+ "loss": 16.0941,
217
+ "step": 24
218
+ },
219
+ {
220
+ "epoch": 0.000423467734934606,
221
+ "grad_norm": 19.496095657348633,
222
+ "learning_rate": 0.000138268343236509,
223
+ "loss": 15.8816,
224
+ "step": 25
225
+ },
226
+ {
227
+ "epoch": 0.000423467734934606,
228
+ "eval_loss": 2.0629289150238037,
229
+ "eval_runtime": 5134.9215,
230
+ "eval_samples_per_second": 9.682,
231
+ "eval_steps_per_second": 4.841,
232
+ "step": 25
233
+ },
234
+ {
235
+ "epoch": 0.0004404064443319902,
236
+ "grad_norm": 33.5986213684082,
237
+ "learning_rate": 0.00013090169943749476,
238
+ "loss": 15.7073,
239
+ "step": 26
240
+ },
241
+ {
242
+ "epoch": 0.00045734515372937445,
243
+ "grad_norm": 16.573434829711914,
244
+ "learning_rate": 0.00012334453638559057,
245
+ "loss": 14.8196,
246
+ "step": 27
247
+ },
248
+ {
249
+ "epoch": 0.0004742838631267587,
250
+ "grad_norm": 20.38245391845703,
251
+ "learning_rate": 0.0001156434465040231,
252
+ "loss": 16.6963,
253
+ "step": 28
254
+ },
255
+ {
256
+ "epoch": 0.0004912225725241429,
257
+ "grad_norm": 16.84417724609375,
258
+ "learning_rate": 0.0001078459095727845,
259
+ "loss": 16.4005,
260
+ "step": 29
261
+ },
262
+ {
263
+ "epoch": 0.0005081612819215272,
264
+ "grad_norm": 20.52333641052246,
265
+ "learning_rate": 0.0001,
266
+ "loss": 15.0153,
267
+ "step": 30
268
+ },
269
+ {
270
+ "epoch": 0.0005081612819215272,
271
+ "eval_loss": 2.041257858276367,
272
+ "eval_runtime": 5138.2338,
273
+ "eval_samples_per_second": 9.676,
274
+ "eval_steps_per_second": 4.838,
275
+ "step": 30
276
  }
277
  ],
278
  "logging_steps": 1,
 
292
  "attributes": {}
293
  }
294
  },
295
+ "total_flos": 4.206407486275584e+16,
296
  "train_batch_size": 2,
297
  "trial_name": null,
298
  "trial_params": null