kokovova commited on
Commit
514fc8a
·
verified ·
1 Parent(s): 00e4984

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c8001888663b3a762e1e077d6e0c04c2566f9678d2ca50bbb83da88f6474a70
3
  size 1521616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cab650935c0482531612b6d928e48d9dae71f5b0cc6d335d35ecfe4d2e682fd
3
  size 1521616
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7da845cda640c4b07e2beed04ca032a0d6a980bb12ef105bc1ce7ad5126445eb
3
  size 3108666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a23578b9d30499d14bd7284f3c7cc52614b791a0d9621c824b873601faa2d12f
3
  size 3108666
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3035ebf7301b6dda2eedd90adafea5c079c473a1e2b75bde30bcba2baea1fc16
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2dacbc91d0d50e41b049f70a84d5b45ce05c0358d13151fa9f001452bc37899
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99fc9c0ec571f76cf9b6d1229601c5173899cd18104e487c5627f5f4c56c6e8a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46fa8207e86dee7d50b0ab12f1dd18c4426e8c65d06f97f8b2bd004a747e9cfa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.01938548027527382,
5
  "eval_steps": 25,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,189 @@
198
  "eval_samples_per_second": 62.467,
199
  "eval_steps_per_second": 31.233,
200
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 1,
@@ -217,7 +400,7 @@
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 14500862361600.0,
221
  "train_batch_size": 2,
222
  "trial_name": null,
223
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03877096055054764,
5
  "eval_steps": 25,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "eval_samples_per_second": 62.467,
199
  "eval_steps_per_second": 31.233,
200
  "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.020160899486284772,
204
+ "grad_norm": 15.573179244995117,
205
+ "learning_rate": 7.68649804173412e-05,
206
+ "loss": 6.3441,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.020936318697295727,
211
+ "grad_norm": 15.074617385864258,
212
+ "learning_rate": 7.500000000000001e-05,
213
+ "loss": 6.2842,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.021711737908306678,
218
+ "grad_norm": 15.864977836608887,
219
+ "learning_rate": 7.308743066175172e-05,
220
+ "loss": 5.7357,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.02248715711931763,
225
+ "grad_norm": 19.382728576660156,
226
+ "learning_rate": 7.113091308703498e-05,
227
+ "loss": 6.0517,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.023262576330328584,
232
+ "grad_norm": 18.489917755126953,
233
+ "learning_rate": 6.91341716182545e-05,
234
+ "loss": 5.7561,
235
+ "step": 30
236
+ },
237
+ {
238
+ "epoch": 0.024037995541339535,
239
+ "grad_norm": 11.977217674255371,
240
+ "learning_rate": 6.710100716628344e-05,
241
+ "loss": 5.3761,
242
+ "step": 31
243
+ },
244
+ {
245
+ "epoch": 0.02481341475235049,
246
+ "grad_norm": 12.359403610229492,
247
+ "learning_rate": 6.503528997521366e-05,
248
+ "loss": 5.6858,
249
+ "step": 32
250
+ },
251
+ {
252
+ "epoch": 0.02558883396336144,
253
+ "grad_norm": 17.48925018310547,
254
+ "learning_rate": 6.294095225512603e-05,
255
+ "loss": 5.4112,
256
+ "step": 33
257
+ },
258
+ {
259
+ "epoch": 0.026364253174372396,
260
+ "grad_norm": 12.810553550720215,
261
+ "learning_rate": 6.0821980696905146e-05,
262
+ "loss": 5.1038,
263
+ "step": 34
264
+ },
265
+ {
266
+ "epoch": 0.027139672385383347,
267
+ "grad_norm": 11.41131591796875,
268
+ "learning_rate": 5.868240888334653e-05,
269
+ "loss": 5.1019,
270
+ "step": 35
271
+ },
272
+ {
273
+ "epoch": 0.0279150915963943,
274
+ "grad_norm": 11.404109001159668,
275
+ "learning_rate": 5.6526309611002594e-05,
276
+ "loss": 5.0096,
277
+ "step": 36
278
+ },
279
+ {
280
+ "epoch": 0.028690510807405253,
281
+ "grad_norm": 10.934562683105469,
282
+ "learning_rate": 5.435778713738292e-05,
283
+ "loss": 4.8367,
284
+ "step": 37
285
+ },
286
+ {
287
+ "epoch": 0.029465930018416207,
288
+ "grad_norm": 11.160900115966797,
289
+ "learning_rate": 5.218096936826681e-05,
290
+ "loss": 5.009,
291
+ "step": 38
292
+ },
293
+ {
294
+ "epoch": 0.03024134922942716,
295
+ "grad_norm": 9.400223731994629,
296
+ "learning_rate": 5e-05,
297
+ "loss": 5.121,
298
+ "step": 39
299
+ },
300
+ {
301
+ "epoch": 0.031016768440438113,
302
+ "grad_norm": 9.578547477722168,
303
+ "learning_rate": 4.781903063173321e-05,
304
+ "loss": 4.9154,
305
+ "step": 40
306
+ },
307
+ {
308
+ "epoch": 0.03179218765144907,
309
+ "grad_norm": 8.08479118347168,
310
+ "learning_rate": 4.564221286261709e-05,
311
+ "loss": 4.4877,
312
+ "step": 41
313
+ },
314
+ {
315
+ "epoch": 0.03256760686246002,
316
+ "grad_norm": 9.323592185974121,
317
+ "learning_rate": 4.347369038899744e-05,
318
+ "loss": 4.7919,
319
+ "step": 42
320
+ },
321
+ {
322
+ "epoch": 0.03334302607347097,
323
+ "grad_norm": 12.592964172363281,
324
+ "learning_rate": 4.131759111665349e-05,
325
+ "loss": 4.9646,
326
+ "step": 43
327
+ },
328
+ {
329
+ "epoch": 0.03411844528448192,
330
+ "grad_norm": 8.313202857971191,
331
+ "learning_rate": 3.917801930309486e-05,
332
+ "loss": 4.5304,
333
+ "step": 44
334
+ },
335
+ {
336
+ "epoch": 0.03489386449549287,
337
+ "grad_norm": 9.623037338256836,
338
+ "learning_rate": 3.705904774487396e-05,
339
+ "loss": 4.5277,
340
+ "step": 45
341
+ },
342
+ {
343
+ "epoch": 0.03566928370650383,
344
+ "grad_norm": 6.551573753356934,
345
+ "learning_rate": 3.4964710024786354e-05,
346
+ "loss": 4.7194,
347
+ "step": 46
348
+ },
349
+ {
350
+ "epoch": 0.03644470291751478,
351
+ "grad_norm": 9.742829322814941,
352
+ "learning_rate": 3.289899283371657e-05,
353
+ "loss": 4.4445,
354
+ "step": 47
355
+ },
356
+ {
357
+ "epoch": 0.03722012212852573,
358
+ "grad_norm": 6.696950435638428,
359
+ "learning_rate": 3.086582838174551e-05,
360
+ "loss": 4.3009,
361
+ "step": 48
362
+ },
363
+ {
364
+ "epoch": 0.037995541339536684,
365
+ "grad_norm": 6.955257415771484,
366
+ "learning_rate": 2.886908691296504e-05,
367
+ "loss": 4.2683,
368
+ "step": 49
369
+ },
370
+ {
371
+ "epoch": 0.03877096055054764,
372
+ "grad_norm": 11.268085479736328,
373
+ "learning_rate": 2.6912569338248315e-05,
374
+ "loss": 4.3482,
375
+ "step": 50
376
+ },
377
+ {
378
+ "epoch": 0.03877096055054764,
379
+ "eval_loss": 4.515497207641602,
380
+ "eval_runtime": 17.341,
381
+ "eval_samples_per_second": 62.626,
382
+ "eval_steps_per_second": 31.313,
383
+ "step": 50
384
  }
385
  ],
386
  "logging_steps": 1,
 
400
  "attributes": {}
401
  }
402
  },
403
+ "total_flos": 29001724723200.0,
404
  "train_batch_size": 2,
405
  "trial_name": null,
406
  "trial_params": null