dimasik87 commited on
Commit
a4ffc71
1 Parent(s): 07f0b03

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e47a73708d6733a3a4722170bf05dd396f7a42640ce961173c53900d441b9b39
3
  size 36981072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf3c2685b5a54ee5eb7eeb51cdf525b938175f023aedc6bff3897f87fd8c7dd6
3
  size 36981072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd0d2a6c985825dd91d914ba1b5ee0e15153e8022036de5122cfdbb0c81cb3af
3
  size 74188650
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58818c092fa372e81eb272432f3e7b70892874e593e0c6c670fccc79847fbaeb
3
  size 74188650
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2e9db38d4c1da0d5d5ef197285e15c71a7f2c782732a3ab7e0af12207119eb5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69965ee7f2e6825bca03ff6693076ba82a5181c358a0bd4c33454b2e1bc24eb6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c792918044964431737f4cb39f3769dbfd230048b1125ac69a6439eb6c8534b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e69e2b49ea642509f0c688c16fb190b7cf27dac0a18903a5e2d1467d0343d8b8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.046735796699284356,
5
  "eval_steps": 5,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -359,6 +359,92 @@
359
  "eval_samples_per_second": 23.131,
360
  "eval_steps_per_second": 11.582,
361
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  }
363
  ],
364
  "logging_steps": 1,
@@ -373,12 +459,12 @@
373
  "should_evaluate": false,
374
  "should_log": false,
375
  "should_save": true,
376
- "should_training_stop": false
377
  },
378
  "attributes": {}
379
  }
380
  },
381
- "total_flos": 1.0442403924148224e+16,
382
  "train_batch_size": 2,
383
  "trial_name": null,
384
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05841974587410545,
5
  "eval_steps": 5,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
359
  "eval_samples_per_second": 23.131,
360
  "eval_steps_per_second": 11.582,
361
  "step": 40
362
+ },
363
+ {
364
+ "epoch": 0.04790419161676647,
365
+ "grad_norm": 0.4715648293495178,
366
+ "learning_rate": 2.3959403439996907e-05,
367
+ "loss": 1.2266,
368
+ "step": 41
369
+ },
370
+ {
371
+ "epoch": 0.04907258653424858,
372
+ "grad_norm": 0.3626469075679779,
373
+ "learning_rate": 1.9098300562505266e-05,
374
+ "loss": 1.3755,
375
+ "step": 42
376
+ },
377
+ {
378
+ "epoch": 0.050240981451730685,
379
+ "grad_norm": 0.3921823799610138,
380
+ "learning_rate": 1.4735983564590783e-05,
381
+ "loss": 1.3125,
382
+ "step": 43
383
+ },
384
+ {
385
+ "epoch": 0.05140937636921279,
386
+ "grad_norm": 0.35432127118110657,
387
+ "learning_rate": 1.0899347581163221e-05,
388
+ "loss": 1.2351,
389
+ "step": 44
390
+ },
391
+ {
392
+ "epoch": 0.052577771286694906,
393
+ "grad_norm": 0.4417038559913635,
394
+ "learning_rate": 7.612046748871327e-06,
395
+ "loss": 1.7417,
396
+ "step": 45
397
+ },
398
+ {
399
+ "epoch": 0.052577771286694906,
400
+ "eval_loss": 1.1834783554077148,
401
+ "eval_runtime": 31.2195,
402
+ "eval_samples_per_second": 23.095,
403
+ "eval_steps_per_second": 11.563,
404
+ "step": 45
405
+ },
406
+ {
407
+ "epoch": 0.053746166204177014,
408
+ "grad_norm": 0.3598865866661072,
409
+ "learning_rate": 4.8943483704846475e-06,
410
+ "loss": 1.4514,
411
+ "step": 46
412
+ },
413
+ {
414
+ "epoch": 0.05491456112165912,
415
+ "grad_norm": 0.43709179759025574,
416
+ "learning_rate": 2.7630079602323442e-06,
417
+ "loss": 1.5828,
418
+ "step": 47
419
+ },
420
+ {
421
+ "epoch": 0.05608295603914123,
422
+ "grad_norm": 0.32742375135421753,
423
+ "learning_rate": 1.231165940486234e-06,
424
+ "loss": 1.5362,
425
+ "step": 48
426
+ },
427
+ {
428
+ "epoch": 0.057251350956623336,
429
+ "grad_norm": 0.31574079394340515,
430
+ "learning_rate": 3.0826662668720364e-07,
431
+ "loss": 1.5458,
432
+ "step": 49
433
+ },
434
+ {
435
+ "epoch": 0.05841974587410545,
436
+ "grad_norm": 0.3892696797847748,
437
+ "learning_rate": 0.0,
438
+ "loss": 1.5221,
439
+ "step": 50
440
+ },
441
+ {
442
+ "epoch": 0.05841974587410545,
443
+ "eval_loss": 1.1832040548324585,
444
+ "eval_runtime": 31.1605,
445
+ "eval_samples_per_second": 23.138,
446
+ "eval_steps_per_second": 11.585,
447
+ "step": 50
448
  }
449
  ],
450
  "logging_steps": 1,
 
459
  "should_evaluate": false,
460
  "should_log": false,
461
  "should_save": true,
462
+ "should_training_stop": true
463
  },
464
  "attributes": {}
465
  }
466
  },
467
+ "total_flos": 1.3069219818110976e+16,
468
  "train_batch_size": 2,
469
  "trial_name": null,
470
  "trial_params": null