penglingwei commited on
Commit
09e135a
1 Parent(s): 18b2222

Training in progress, step 32400, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0ba60c26c51cf355e00f36b738194907999ee45ca4f8785c8e31a7623c98e9b
3
  size 355967616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d064f2332ced8d589411b314d74aa64dbff4eaac037d31068824e3f0f523bb4
3
  size 355967616
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05c91ffc2b10c397203e4c7dead597b4287d25e577c0e6df9529c30b50b8aa2a
3
  size 712029114
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ab97c4aa3f15e2600cfe333e231ff19c0e23e67658b81c4a2261ae5c1af8966
3
  size 712029114
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10bb518808924f8ec5f4cbecfd83ac3913ab2f1042a575afcab54bd6c4cbfb73
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae11df3d2278a8ff4a04dff28369bcaea0cb0b7c145bd92f25a776466f1de16
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a70f42d06afa8cde3bcd311e92f70e0210525048300805011ba7af6d0c326c99
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7303cf337b0f3230e3aa10fc27672d49524c56a76ad0cc51d0583606599733d7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9945653010638462,
5
  "eval_steps": 500,
6
- "global_step": 32300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22617,6 +22617,76 @@
22617
  "learning_rate": 4.355681129189493e-09,
22618
  "loss": 0.3696,
22619
  "step": 32300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22620
  }
22621
  ],
22622
  "logging_steps": 10,
@@ -22636,7 +22706,7 @@
22636
  "attributes": {}
22637
  }
22638
  },
22639
- "total_flos": 2.1605715542016e+18,
22640
  "train_batch_size": 8,
22641
  "trial_name": null,
22642
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9976444506027435,
5
  "eval_steps": 500,
6
+ "global_step": 32400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22617
  "learning_rate": 4.355681129189493e-09,
22618
  "loss": 0.3696,
22619
  "step": 32300
22620
+ },
22621
+ {
22622
+ "epoch": 0.9948732160177359,
22623
+ "grad_norm": 17.661426544189453,
22624
+ "learning_rate": 3.909265612511703e-09,
22625
+ "loss": 0.4423,
22626
+ "step": 32310
22627
+ },
22628
+ {
22629
+ "epoch": 0.9951811309716256,
22630
+ "grad_norm": 25.57279396057129,
22631
+ "learning_rate": 3.486978218514292e-09,
22632
+ "loss": 0.3579,
22633
+ "step": 32320
22634
+ },
22635
+ {
22636
+ "epoch": 0.9954890459255153,
22637
+ "grad_norm": 14.529350280761719,
22638
+ "learning_rate": 3.0888193548239687e-09,
22639
+ "loss": 0.3905,
22640
+ "step": 32330
22641
+ },
22642
+ {
22643
+ "epoch": 0.9957969608794051,
22644
+ "grad_norm": 13.980672836303711,
22645
+ "learning_rate": 2.714789405772189e-09,
22646
+ "loss": 0.2954,
22647
+ "step": 32340
22648
+ },
22649
+ {
22650
+ "epoch": 0.9961048758332949,
22651
+ "grad_norm": 15.562602043151855,
22652
+ "learning_rate": 2.364888732403481e-09,
22653
+ "loss": 0.2862,
22654
+ "step": 32350
22655
+ },
22656
+ {
22657
+ "epoch": 0.9964127907871846,
22658
+ "grad_norm": 13.155916213989258,
22659
+ "learning_rate": 2.039117672464341e-09,
22660
+ "loss": 0.4238,
22661
+ "step": 32360
22662
+ },
22663
+ {
22664
+ "epoch": 0.9967207057410743,
22665
+ "grad_norm": 10.378691673278809,
22666
+ "learning_rate": 1.7374765404143401e-09,
22667
+ "loss": 0.4403,
22668
+ "step": 32370
22669
+ },
22670
+ {
22671
+ "epoch": 0.997028620694964,
22672
+ "grad_norm": 11.533722877502441,
22673
+ "learning_rate": 1.4599656274261186e-09,
22674
+ "loss": 0.4543,
22675
+ "step": 32380
22676
+ },
22677
+ {
22678
+ "epoch": 0.9973365356488538,
22679
+ "grad_norm": 15.00934886932373,
22680
+ "learning_rate": 1.2065852013659618e-09,
22681
+ "loss": 0.5758,
22682
+ "step": 32390
22683
+ },
22684
+ {
22685
+ "epoch": 0.9976444506027435,
22686
+ "grad_norm": 19.549671173095703,
22687
+ "learning_rate": 9.773355068215528e-10,
22688
+ "loss": 0.6151,
22689
+ "step": 32400
22690
  }
22691
  ],
22692
  "logging_steps": 10,
 
22706
  "attributes": {}
22707
  }
22708
  },
22709
+ "total_flos": 2.1672606302208e+18,
22710
  "train_batch_size": 8,
22711
  "trial_name": null,
22712
  "trial_params": null