leixa commited on
Commit
e678d55
1 Parent(s): 3d5df0b

Training in progress, step 231, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2791b511c4630b21fd991533625ec1ec52da3e5cc1609da7a4c2cfedc1bcba6d
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c03cc5a79be0ba30ad0320be99eba132052fb920252f4f639abfb0267ea350a0
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a884f4320934e16da5143deade141b5396382f6dfdc0784d68105ea5d71bc6b2
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6554ffe7014b0c305fc27e46e009d8002b5e22780cf3137e2842ed01d2e8fb0
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fec363189963dc133232a1202530bba3901933ae6ee2483645557d8ee2922117
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3a05bdcba00a37dc9e6fb656aa0abeeb8eb45eca58cc80e1b27558770bab32
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16a7801db1aa9f181cf78d5699e3a7862ab42bf9c452e31cb54501196abe18a0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fc5b95cc0db21cf56336a583c8dd7fb1d3824838e4cb847d2705abaaeeca402
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5438066465256797,
5
  "eval_steps": 21,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -585,6 +585,63 @@
585
  "eval_samples_per_second": 532.826,
586
  "eval_steps_per_second": 68.506,
587
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  }
589
  ],
590
  "logging_steps": 3,
@@ -604,7 +661,7 @@
604
  "attributes": {}
605
  }
606
  },
607
- "total_flos": 23425638727680.0,
608
  "train_batch_size": 8,
609
  "trial_name": null,
610
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.797583081570997,
5
  "eval_steps": 21,
6
+ "global_step": 231,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
585
  "eval_samples_per_second": 532.826,
586
  "eval_steps_per_second": 68.506,
587
  "step": 210
588
+ },
589
+ {
590
+ "epoch": 2.580060422960725,
591
+ "grad_norm": 0.17420655488967896,
592
+ "learning_rate": 5.494517259623477e-06,
593
+ "loss": 10.1712,
594
+ "step": 213
595
+ },
596
+ {
597
+ "epoch": 2.61631419939577,
598
+ "grad_norm": 0.23611021041870117,
599
+ "learning_rate": 4.630749768552589e-06,
600
+ "loss": 10.1776,
601
+ "step": 216
602
+ },
603
+ {
604
+ "epoch": 2.6525679758308156,
605
+ "grad_norm": 0.21432390809059143,
606
+ "learning_rate": 3.837524928243774e-06,
607
+ "loss": 10.1729,
608
+ "step": 219
609
+ },
610
+ {
611
+ "epoch": 2.688821752265861,
612
+ "grad_norm": 0.27384114265441895,
613
+ "learning_rate": 3.116076089096265e-06,
614
+ "loss": 10.1782,
615
+ "step": 222
616
+ },
617
+ {
618
+ "epoch": 2.7250755287009065,
619
+ "grad_norm": 0.18094521760940552,
620
+ "learning_rate": 2.4675250001635232e-06,
621
+ "loss": 10.1835,
622
+ "step": 225
623
+ },
624
+ {
625
+ "epoch": 2.7613293051359515,
626
+ "grad_norm": 0.25660476088523865,
627
+ "learning_rate": 1.892880064994934e-06,
628
+ "loss": 10.179,
629
+ "step": 228
630
+ },
631
+ {
632
+ "epoch": 2.797583081570997,
633
+ "grad_norm": 0.23092766106128693,
634
+ "learning_rate": 1.3930347737136196e-06,
635
+ "loss": 10.1815,
636
+ "step": 231
637
+ },
638
+ {
639
+ "epoch": 2.797583081570997,
640
+ "eval_loss": 10.169166564941406,
641
+ "eval_runtime": 0.2602,
642
+ "eval_samples_per_second": 538.147,
643
+ "eval_steps_per_second": 69.19,
644
+ "step": 231
645
  }
646
  ],
647
  "logging_steps": 3,
 
661
  "attributes": {}
662
  }
663
  },
664
+ "total_flos": 25768202600448.0,
665
  "train_batch_size": 8,
666
  "trial_name": null,
667
  "trial_params": null