penglingwei commited on
Commit
c695799
1 Parent(s): 4c605a7

Training in progress, step 32476, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d064f2332ced8d589411b314d74aa64dbff4eaac037d31068824e3f0f523bb4
3
  size 355967616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33340270abd0409af6d05e61ef0599813be5c1844119d7769c7068db632d1d9b
3
  size 355967616
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ab97c4aa3f15e2600cfe333e231ff19c0e23e67658b81c4a2261ae5c1af8966
3
  size 712029114
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be70fd8ab879b6d39326b6d12348412b03e6ccfb19399bc1393d6d85e5db7863
3
  size 712029114
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cae11df3d2278a8ff4a04dff28369bcaea0cb0b7c145bd92f25a776466f1de16
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148ed913b32e8721d0e239a3585a6304177384e08458813be1eba0a7d8567889
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7303cf337b0f3230e3aa10fc27672d49524c56a76ad0cc51d0583606599733d7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff90769edad154332f3106098071f15e0ed13400ddcc176f41814d521feb3838
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9976444506027435,
5
  "eval_steps": 500,
6
- "global_step": 32400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -22687,6 +22687,55 @@
22687
  "learning_rate": 9.773355068215528e-10,
22688
  "loss": 0.6151,
22689
  "step": 32400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22690
  }
22691
  ],
22692
  "logging_steps": 10,
@@ -22701,12 +22750,12 @@
22701
  "should_evaluate": false,
22702
  "should_log": false,
22703
  "should_save": true,
22704
- "should_training_stop": false
22705
  },
22706
  "attributes": {}
22707
  }
22708
  },
22709
- "total_flos": 2.1672606302208e+18,
22710
  "train_batch_size": 8,
22711
  "trial_name": null,
22712
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9999846042523055,
5
  "eval_steps": 500,
6
+ "global_step": 32476,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
22687
  "learning_rate": 9.773355068215528e-10,
22688
  "loss": 0.6151,
22689
  "step": 32400
22690
+ },
22691
+ {
22692
+ "epoch": 0.9979523655566332,
22693
+ "grad_norm": 13.585440635681152,
22694
+ "learning_rate": 7.722167650797696e-10,
22695
+ "loss": 0.4538,
22696
+ "step": 32410
22697
+ },
22698
+ {
22699
+ "epoch": 0.998260280510523,
22700
+ "grad_norm": 16.41465187072754,
22701
+ "learning_rate": 5.912291741405618e-10,
22702
+ "loss": 0.3448,
22703
+ "step": 32420
22704
+ },
22705
+ {
22706
+ "epoch": 0.9985681954644128,
22707
+ "grad_norm": 17.536109924316406,
22708
+ "learning_rate": 4.343729087002979e-10,
22709
+ "loss": 0.3335,
22710
+ "step": 32430
22711
+ },
22712
+ {
22713
+ "epoch": 0.9988761104183025,
22714
+ "grad_norm": 17.59690284729004,
22715
+ "learning_rate": 3.016481201739696e-10,
22716
+ "loss": 0.539,
22717
+ "step": 32440
22718
+ },
22719
+ {
22720
+ "epoch": 0.9991840253721922,
22721
+ "grad_norm": 23.103965759277344,
22722
+ "learning_rate": 1.930549366757628e-10,
22723
+ "loss": 0.5025,
22724
+ "step": 32450
22725
+ },
22726
+ {
22727
+ "epoch": 0.9994919403260819,
22728
+ "grad_norm": 13.974841117858887,
22729
+ "learning_rate": 1.0859346302460882e-10,
22730
+ "loss": 0.4925,
22731
+ "step": 32460
22732
+ },
22733
+ {
22734
+ "epoch": 0.9997998552799716,
22735
+ "grad_norm": 12.541289329528809,
22736
+ "learning_rate": 4.826378075528659e-11,
22737
+ "loss": 0.5266,
22738
+ "step": 32470
22739
  }
22740
  ],
22741
  "logging_steps": 10,
 
22750
  "should_evaluate": false,
22751
  "should_log": false,
22752
  "should_save": true,
22753
+ "should_training_stop": true
22754
  },
22755
  "attributes": {}
22756
  }
22757
  },
22758
+ "total_flos": 2.172344327995392e+18,
22759
  "train_batch_size": 8,
22760
  "trial_name": null,
22761
  "trial_params": null