leixa commited on
Commit
350f668
1 Parent(s): b127d71

Training in progress, step 252, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd409d2cda44386875e2237cbc14387320bb7b2b55cde3c8ddd45a9c2bb03e9a
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586fd04ed04d760d37f53f41e09542570cc554ebc66067372543dd3f2a963511
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c8061481dc5236ca996649ca5cc63afee81ef7df75bdd47d238714a9ba6b4f2
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28b2ceac6018dae6fdb4d9ef0f5c8fb480f407b18c678ebe83cbb5b4ab5cb5b7
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46bf71438827f27fea3a358fd83e1761732ffba0ae573ca96fc80e490196bc32
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b9fe7e09a65746829ba365c53904d26ac9041437bdc46c4d9bdaea8de869a7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd95a1db0e917ddf11b12a343f06e907fcec4b81104002e2471b4778587b465
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee19ddad9c4c375a1de2d74fb4c1cf5e15d36c1ed47a2cb80f7cb0fbacb3b29e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6840390879478827,
5
  "eval_steps": 42,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -545,6 +545,112 @@
545
  "eval_samples_per_second": 23.348,
546
  "eval_steps_per_second": 5.871,
547
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  }
549
  ],
550
  "logging_steps": 3,
@@ -564,7 +670,7 @@
564
  "attributes": {}
565
  }
566
  },
567
- "total_flos": 1.5839392576831488e+17,
568
  "train_batch_size": 4,
569
  "trial_name": null,
570
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8208469055374593,
5
  "eval_steps": 42,
6
+ "global_step": 252,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
545
  "eval_samples_per_second": 23.348,
546
  "eval_steps_per_second": 5.871,
547
  "step": 210
548
+ },
549
+ {
550
+ "epoch": 0.6938110749185668,
551
+ "grad_norm": 2.490628719329834,
552
+ "learning_rate": 6.330184227833376e-05,
553
+ "loss": 1.4418,
554
+ "step": 213
555
+ },
556
+ {
557
+ "epoch": 0.7035830618892508,
558
+ "grad_norm": 2.3577187061309814,
559
+ "learning_rate": 6.237238428024572e-05,
560
+ "loss": 1.2653,
561
+ "step": 216
562
+ },
563
+ {
564
+ "epoch": 0.7133550488599348,
565
+ "grad_norm": 2.6168723106384277,
566
+ "learning_rate": 6.143834918526527e-05,
567
+ "loss": 1.4725,
568
+ "step": 219
569
+ },
570
+ {
571
+ "epoch": 0.7231270358306189,
572
+ "grad_norm": 2.343191146850586,
573
+ "learning_rate": 6.0500082534642464e-05,
574
+ "loss": 1.3543,
575
+ "step": 222
576
+ },
577
+ {
578
+ "epoch": 0.7328990228013029,
579
+ "grad_norm": 2.482631206512451,
580
+ "learning_rate": 5.955793143506863e-05,
581
+ "loss": 1.3523,
582
+ "step": 225
583
+ },
584
+ {
585
+ "epoch": 0.742671009771987,
586
+ "grad_norm": 2.1662986278533936,
587
+ "learning_rate": 5.861224443026595e-05,
588
+ "loss": 1.2848,
589
+ "step": 228
590
+ },
591
+ {
592
+ "epoch": 0.752442996742671,
593
+ "grad_norm": 2.496674060821533,
594
+ "learning_rate": 5.766337137204579e-05,
595
+ "loss": 1.3919,
596
+ "step": 231
597
+ },
598
+ {
599
+ "epoch": 0.762214983713355,
600
+ "grad_norm": 2.5266311168670654,
601
+ "learning_rate": 5.6711663290882776e-05,
602
+ "loss": 1.4481,
603
+ "step": 234
604
+ },
605
+ {
606
+ "epoch": 0.7719869706840391,
607
+ "grad_norm": 2.154391288757324,
608
+ "learning_rate": 5.575747226605298e-05,
609
+ "loss": 1.1874,
610
+ "step": 237
611
+ },
612
+ {
613
+ "epoch": 0.7817589576547231,
614
+ "grad_norm": 2.2282350063323975,
615
+ "learning_rate": 5.480115129538409e-05,
616
+ "loss": 1.2684,
617
+ "step": 240
618
+ },
619
+ {
620
+ "epoch": 0.7915309446254072,
621
+ "grad_norm": 2.377781391143799,
622
+ "learning_rate": 5.384305416466584e-05,
623
+ "loss": 1.2627,
624
+ "step": 243
625
+ },
626
+ {
627
+ "epoch": 0.8013029315960912,
628
+ "grad_norm": 2.505831480026245,
629
+ "learning_rate": 5.288353531676873e-05,
630
+ "loss": 1.2982,
631
+ "step": 246
632
+ },
633
+ {
634
+ "epoch": 0.8110749185667753,
635
+ "grad_norm": 2.424248456954956,
636
+ "learning_rate": 5.192294972051992e-05,
637
+ "loss": 1.2661,
638
+ "step": 249
639
+ },
640
+ {
641
+ "epoch": 0.8208469055374593,
642
+ "grad_norm": 2.3504350185394287,
643
+ "learning_rate": 5.0961652739384356e-05,
644
+ "loss": 1.3146,
645
+ "step": 252
646
+ },
647
+ {
648
+ "epoch": 0.8208469055374593,
649
+ "eval_loss": 1.3527233600616455,
650
+ "eval_runtime": 22.1769,
651
+ "eval_samples_per_second": 23.313,
652
+ "eval_steps_per_second": 5.862,
653
+ "step": 252
654
  }
655
  ],
656
  "logging_steps": 3,
 
670
  "attributes": {}
671
  }
672
  },
673
+ "total_flos": 1.9007271092197786e+17,
674
  "train_batch_size": 4,
675
  "trial_name": null,
676
  "trial_params": null