leixa commited on
Commit
eab4a2b
·
verified ·
1 Parent(s): 128fbcb

Training in progress, step 252, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bc6d404f7fd69fe907c7d54d9d2ba1b802a5e0867a87634f546ea28477a5f17
3
  size 201892112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c4d1afd543174e3ab9a2fe34af04ea8af62f4a7862fc78925dd62a536e9feaf
3
  size 201892112
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32ee2b6ae3076a559fcb80a8ff9862fd211c95d781fac1e407e8f1030ac38660
3
  size 102864548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67f2182ec15d55237558a16b27d2d849447fdca6258247107fbf31c03b1c68d
3
  size 102864548
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a58fcd0d4e63bda996273295cf19629f45f15b4f31eb5cecaee6502ebb86992
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa5a85cd5fb1a43d1a3608369f309632ab765e2fc493019f6bf624d685f85850
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd95a1db0e917ddf11b12a343f06e907fcec4b81104002e2471b4778587b465
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee19ddad9c4c375a1de2d74fb4c1cf5e15d36c1ed47a2cb80f7cb0fbacb3b29e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.04402054292002935,
5
  "eval_steps": 42,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -545,6 +545,112 @@
545
  "eval_samples_per_second": 48.679,
546
  "eval_steps_per_second": 6.089,
547
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  }
549
  ],
550
  "logging_steps": 3,
@@ -564,7 +670,7 @@
564
  "attributes": {}
565
  }
566
  },
567
- "total_flos": 4.479610490191872e+16,
568
  "train_batch_size": 8,
569
  "trial_name": null,
570
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05282465150403522,
5
  "eval_steps": 42,
6
+ "global_step": 252,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
545
  "eval_samples_per_second": 48.679,
546
  "eval_steps_per_second": 6.089,
547
  "step": 210
548
+ },
549
+ {
550
+ "epoch": 0.04464940781888691,
551
+ "grad_norm": 0.3846144676208496,
552
+ "learning_rate": 6.330184227833376e-05,
553
+ "loss": 1.0922,
554
+ "step": 213
555
+ },
556
+ {
557
+ "epoch": 0.04527827271774447,
558
+ "grad_norm": 0.37346020340919495,
559
+ "learning_rate": 6.237238428024572e-05,
560
+ "loss": 1.0727,
561
+ "step": 216
562
+ },
563
+ {
564
+ "epoch": 0.045907137616602034,
565
+ "grad_norm": 0.3563274145126343,
566
+ "learning_rate": 6.143834918526527e-05,
567
+ "loss": 1.144,
568
+ "step": 219
569
+ },
570
+ {
571
+ "epoch": 0.0465360025154596,
572
+ "grad_norm": 0.3684130907058716,
573
+ "learning_rate": 6.0500082534642464e-05,
574
+ "loss": 1.0526,
575
+ "step": 222
576
+ },
577
+ {
578
+ "epoch": 0.047164867414317156,
579
+ "grad_norm": 0.37623313069343567,
580
+ "learning_rate": 5.955793143506863e-05,
581
+ "loss": 1.0694,
582
+ "step": 225
583
+ },
584
+ {
585
+ "epoch": 0.04779373231317472,
586
+ "grad_norm": 0.335705429315567,
587
+ "learning_rate": 5.861224443026595e-05,
588
+ "loss": 1.0766,
589
+ "step": 228
590
+ },
591
+ {
592
+ "epoch": 0.04842259721203228,
593
+ "grad_norm": 0.35654446482658386,
594
+ "learning_rate": 5.766337137204579e-05,
595
+ "loss": 1.091,
596
+ "step": 231
597
+ },
598
+ {
599
+ "epoch": 0.04905146211088984,
600
+ "grad_norm": 0.33855140209198,
601
+ "learning_rate": 5.6711663290882776e-05,
602
+ "loss": 1.0715,
603
+ "step": 234
604
+ },
605
+ {
606
+ "epoch": 0.04968032700974741,
607
+ "grad_norm": 0.3658079206943512,
608
+ "learning_rate": 5.575747226605298e-05,
609
+ "loss": 1.0915,
610
+ "step": 237
611
+ },
612
+ {
613
+ "epoch": 0.050309191908604965,
614
+ "grad_norm": 0.3918761610984802,
615
+ "learning_rate": 5.480115129538409e-05,
616
+ "loss": 1.073,
617
+ "step": 240
618
+ },
619
+ {
620
+ "epoch": 0.05093805680746253,
621
+ "grad_norm": 0.37080520391464233,
622
+ "learning_rate": 5.384305416466584e-05,
623
+ "loss": 1.0234,
624
+ "step": 243
625
+ },
626
+ {
627
+ "epoch": 0.051566921706320094,
628
+ "grad_norm": 0.315390020608902,
629
+ "learning_rate": 5.288353531676873e-05,
630
+ "loss": 1.0456,
631
+ "step": 246
632
+ },
633
+ {
634
+ "epoch": 0.05219578660517765,
635
+ "grad_norm": 0.34985366463661194,
636
+ "learning_rate": 5.192294972051992e-05,
637
+ "loss": 1.0505,
638
+ "step": 249
639
+ },
640
+ {
641
+ "epoch": 0.05282465150403522,
642
+ "grad_norm": 0.35391372442245483,
643
+ "learning_rate": 5.0961652739384356e-05,
644
+ "loss": 1.0728,
645
+ "step": 252
646
+ },
647
+ {
648
+ "epoch": 0.05282465150403522,
649
+ "eval_loss": 1.0837104320526123,
650
+ "eval_runtime": 164.9887,
651
+ "eval_samples_per_second": 48.7,
652
+ "eval_steps_per_second": 6.091,
653
+ "step": 252
654
  }
655
  ],
656
  "logging_steps": 3,
 
670
  "attributes": {}
671
  }
672
  },
673
+ "total_flos": 5.375532588230246e+16,
674
  "train_batch_size": 8,
675
  "trial_name": null,
676
  "trial_params": null