RoyJoy commited on
Commit
cbe0b82
1 Parent(s): 9d5e045

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f0c9695bce9b5857d2118a769bb184bca737d5dfc022cd6ecfb91f3bd5a604c
3
  size 50624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d264dda68f41fba6eb5dba9a585a510fb9b3d753af81ccec1015005947682235
3
  size 50624
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d50bcb5ab4a8a37a1ddf718c2380e06ebea05b459d8ff856c14fda6cff2a26a9
3
  size 118090
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1083c750e495b6e6fdab421e66291d4f82cfaa07d10da4f4215fc8d6b1871154
3
  size 118090
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccd5f96a0c54d9c7fc696cd2de657ab87643082dd5b551287b669ca224896764
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f699f010db684a3c9930ab741407256ee8eccb1f91864778668a998661bf53e
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b163f39dfe27dc2b3dbffe608a672c5c24782a59c01d78ba19fb42e537b1c880
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb9466cb0f4406b872db94f91c941ba4e6bd8b8f75768065c30577b6082e0df3
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3c2e9f653665898599ba92c14fead2834af70cd76abf31d5deed9f0b6254a84
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91518c078cc87b6d72db505a715dee7376a7f9856a0e02bff458e4546a5c7ee1
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b04fb1bf3df1d2f6147ab7478673cead0a366e5db2e2dbeb350bb77196eaef95
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40ce070ec338db07ca72e7709c477efa484ca5e4bf94660875a08d69f708af27
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5953f228b44971a299d26da55a02076758d03b3520ac0e04c68962ec4a9616bc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1a647e3cb9f019d66be656f671b0f6e626eada7227d358f0d941f9f9001a15e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7692307692307693,
5
  "eval_steps": 9,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -206,6 +206,205 @@
206
  "learning_rate": 9.284285880837946e-05,
207
  "loss": 10.3683,
208
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  }
210
  ],
211
  "logging_steps": 1,
@@ -225,7 +424,7 @@
225
  "attributes": {}
226
  }
227
  },
228
- "total_flos": 5230244659200.0,
229
  "train_batch_size": 8,
230
  "trial_name": null,
231
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5384615384615383,
5
  "eval_steps": 9,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
206
  "learning_rate": 9.284285880837946e-05,
207
  "loss": 10.3683,
208
  "step": 25
209
+ },
210
+ {
211
+ "epoch": 0.8,
212
+ "grad_norm": 0.3016285002231598,
213
+ "learning_rate": 9.188429243149824e-05,
214
+ "loss": 10.3652,
215
+ "step": 26
216
+ },
217
+ {
218
+ "epoch": 0.8307692307692308,
219
+ "grad_norm": 0.3102306127548218,
220
+ "learning_rate": 9.087111692794459e-05,
221
+ "loss": 10.3612,
222
+ "step": 27
223
+ },
224
+ {
225
+ "epoch": 0.8307692307692308,
226
+ "eval_loss": 10.371162414550781,
227
+ "eval_runtime": 0.0804,
228
+ "eval_samples_per_second": 1355.392,
229
+ "eval_steps_per_second": 49.739,
230
+ "step": 27
231
+ },
232
+ {
233
+ "epoch": 0.8615384615384616,
234
+ "grad_norm": 0.2973518967628479,
235
+ "learning_rate": 8.980465328528219e-05,
236
+ "loss": 10.3562,
237
+ "step": 28
238
+ },
239
+ {
240
+ "epoch": 0.8923076923076924,
241
+ "grad_norm": 0.27339646220207214,
242
+ "learning_rate": 8.868629196864182e-05,
243
+ "loss": 10.3745,
244
+ "step": 29
245
+ },
246
+ {
247
+ "epoch": 0.9230769230769231,
248
+ "grad_norm": 0.30518829822540283,
249
+ "learning_rate": 8.751749110782012e-05,
250
+ "loss": 10.3788,
251
+ "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.9538461538461539,
255
+ "grad_norm": 0.30676740407943726,
256
+ "learning_rate": 8.629977459615655e-05,
257
+ "loss": 10.3631,
258
+ "step": 31
259
+ },
260
+ {
261
+ "epoch": 0.9846153846153847,
262
+ "grad_norm": 0.3426137864589691,
263
+ "learning_rate": 8.503473010366713e-05,
264
+ "loss": 10.3782,
265
+ "step": 32
266
+ },
267
+ {
268
+ "epoch": 1.0153846153846153,
269
+ "grad_norm": 0.43814149498939514,
270
+ "learning_rate": 8.37240070070257e-05,
271
+ "loss": 14.7399,
272
+ "step": 33
273
+ },
274
+ {
275
+ "epoch": 1.0461538461538462,
276
+ "grad_norm": 0.39517074823379517,
277
+ "learning_rate": 8.236931423909138e-05,
278
+ "loss": 11.6581,
279
+ "step": 34
280
+ },
281
+ {
282
+ "epoch": 1.0769230769230769,
283
+ "grad_norm": 0.286582887172699,
284
+ "learning_rate": 8.097241806078615e-05,
285
+ "loss": 9.8718,
286
+ "step": 35
287
+ },
288
+ {
289
+ "epoch": 1.1076923076923078,
290
+ "grad_norm": 0.3339255154132843,
291
+ "learning_rate": 7.953513975822755e-05,
292
+ "loss": 9.8388,
293
+ "step": 36
294
+ },
295
+ {
296
+ "epoch": 1.1076923076923078,
297
+ "eval_loss": 10.362679481506348,
298
+ "eval_runtime": 0.0735,
299
+ "eval_samples_per_second": 1482.077,
300
+ "eval_steps_per_second": 54.388,
301
+ "step": 36
302
+ },
303
+ {
304
+ "epoch": 1.1384615384615384,
305
+ "grad_norm": 0.38969117403030396,
306
+ "learning_rate": 7.805935326811912e-05,
307
+ "loss": 9.6292,
308
+ "step": 37
309
+ },
310
+ {
311
+ "epoch": 1.1692307692307693,
312
+ "grad_norm": 0.4563569724559784,
313
+ "learning_rate": 7.654698273449435e-05,
314
+ "loss": 11.4989,
315
+ "step": 38
316
+ },
317
+ {
318
+ "epoch": 1.2,
319
+ "grad_norm": 0.5030809044837952,
320
+ "learning_rate": 7.500000000000001e-05,
321
+ "loss": 11.8672,
322
+ "step": 39
323
+ },
324
+ {
325
+ "epoch": 1.2307692307692308,
326
+ "grad_norm": 0.32794782519340515,
327
+ "learning_rate": 7.342042203498951e-05,
328
+ "loss": 9.4949,
329
+ "step": 40
330
+ },
331
+ {
332
+ "epoch": 1.2615384615384615,
333
+ "grad_norm": 0.3771244287490845,
334
+ "learning_rate": 7.181030830777837e-05,
335
+ "loss": 8.6339,
336
+ "step": 41
337
+ },
338
+ {
339
+ "epoch": 1.2923076923076924,
340
+ "grad_norm": 0.37634986639022827,
341
+ "learning_rate": 7.017175809949044e-05,
342
+ "loss": 9.5719,
343
+ "step": 42
344
+ },
345
+ {
346
+ "epoch": 1.323076923076923,
347
+ "grad_norm": 0.5357686877250671,
348
+ "learning_rate": 6.850690776699573e-05,
349
+ "loss": 13.4886,
350
+ "step": 43
351
+ },
352
+ {
353
+ "epoch": 1.353846153846154,
354
+ "grad_norm": 0.41375601291656494,
355
+ "learning_rate": 6.681792795750875e-05,
356
+ "loss": 10.2368,
357
+ "step": 44
358
+ },
359
+ {
360
+ "epoch": 1.3846153846153846,
361
+ "grad_norm": 0.3804188370704651,
362
+ "learning_rate": 6.510702077847863e-05,
363
+ "loss": 8.42,
364
+ "step": 45
365
+ },
366
+ {
367
+ "epoch": 1.3846153846153846,
368
+ "eval_loss": 10.35329532623291,
369
+ "eval_runtime": 0.0829,
370
+ "eval_samples_per_second": 1314.084,
371
+ "eval_steps_per_second": 48.223,
372
+ "step": 45
373
+ },
374
+ {
375
+ "epoch": 1.4153846153846155,
376
+ "grad_norm": 0.5498846173286438,
377
+ "learning_rate": 6.337641692646106e-05,
378
+ "loss": 10.7262,
379
+ "step": 46
380
+ },
381
+ {
382
+ "epoch": 1.4461538461538461,
383
+ "grad_norm": 0.4699338972568512,
384
+ "learning_rate": 6.162837277871553e-05,
385
+ "loss": 10.9246,
386
+ "step": 47
387
+ },
388
+ {
389
+ "epoch": 1.476923076923077,
390
+ "grad_norm": 0.47309648990631104,
391
+ "learning_rate": 5.9865167451320005e-05,
392
+ "loss": 10.4618,
393
+ "step": 48
394
+ },
395
+ {
396
+ "epoch": 1.5076923076923077,
397
+ "grad_norm": 0.44090601801872253,
398
+ "learning_rate": 5.808909982763825e-05,
399
+ "loss": 9.5822,
400
+ "step": 49
401
+ },
402
+ {
403
+ "epoch": 1.5384615384615383,
404
+ "grad_norm": 0.6003273129463196,
405
+ "learning_rate": 5.6302485561014475e-05,
406
+ "loss": 12.5208,
407
+ "step": 50
408
  }
409
  ],
410
  "logging_steps": 1,
 
424
  "attributes": {}
425
  }
426
  },
427
+ "total_flos": 10460489318400.0,
428
  "train_batch_size": 8,
429
  "trial_name": null,
430
  "trial_params": null