diaenra commited on
Commit
fbf84cf
1 Parent(s): 97ec63d

Training in progress, step 2899, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a96170c4566c1e7efa993538ecb2474b7056c80c01787e9d312ce31d75111369
3
  size 2145944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ec0ef0bd58107442e53f9af82589272dd28dae4b201ef6aa9c49f0c83bfc1c
3
  size 2145944
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:635f9699d60977f7f49fef0fac4d8dd2e9540f390cee9cca0ae8d57bdec47c46
3
  size 4310020
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9965916e24cdc2427889fb3540f08506a8da344166dce118b8ba199ea51941c
3
  size 4310020
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9966c454e255711c675f388ff89a77f36c50534180a271dc2d40e7f43b870bf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4242e9d0ac065fef7739bb68fca0ee428f925673e7726dd722595d4147173376
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:237b866b1fce3ebf6f30679e5de802610141e1fcea25e04865362188175cac93
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35fbe8edc635ebc2790ec5a9bb024f4b9252d94d57cb9bc113900a36047bd39
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9893066574680924,
5
  "eval_steps": 500,
6
- "global_step": 2868,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -20083,6 +20083,223 @@
20083
  "learning_rate": 3.0263108131095566e-08,
20084
  "loss": 9.5316,
20085
  "step": 2868
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20086
  }
20087
  ],
20088
  "logging_steps": 1,
@@ -20097,12 +20314,12 @@
20097
  "should_evaluate": false,
20098
  "should_log": false,
20099
  "should_save": true,
20100
- "should_training_stop": false
20101
  },
20102
  "attributes": {}
20103
  }
20104
  },
20105
- "total_flos": 595349320237056.0,
20106
  "train_batch_size": 4,
20107
  "trial_name": null,
20108
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 2899,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
20083
  "learning_rate": 3.0263108131095566e-08,
20084
  "loss": 9.5316,
20085
  "step": 2868
20086
+ },
20087
+ {
20088
+ "epoch": 0.9896516040013797,
20089
+ "grad_norm": 1.1785590648651123,
20090
+ "learning_rate": 2.8342322313085202e-08,
20091
+ "loss": 9.4722,
20092
+ "step": 2869
20093
+ },
20094
+ {
20095
+ "epoch": 0.9899965505346672,
20096
+ "grad_norm": 1.2561894655227661,
20097
+ "learning_rate": 2.6484489673084257e-08,
20098
+ "loss": 9.5575,
20099
+ "step": 2870
20100
+ },
20101
+ {
20102
+ "epoch": 0.9903414970679545,
20103
+ "grad_norm": 1.1631916761398315,
20104
+ "learning_rate": 2.4689612551553888e-08,
20105
+ "loss": 9.5365,
20106
+ "step": 2871
20107
+ },
20108
+ {
20109
+ "epoch": 0.9906864436012418,
20110
+ "grad_norm": 1.2572925090789795,
20111
+ "learning_rate": 2.2957693209635368e-08,
20112
+ "loss": 9.5596,
20113
+ "step": 2872
20114
+ },
20115
+ {
20116
+ "epoch": 0.9910313901345291,
20117
+ "grad_norm": 1.324566125869751,
20118
+ "learning_rate": 2.1288733829161188e-08,
20119
+ "loss": 9.4521,
20120
+ "step": 2873
20121
+ },
20122
+ {
20123
+ "epoch": 0.9913763366678164,
20124
+ "grad_norm": 1.3486089706420898,
20125
+ "learning_rate": 1.9682736512660617e-08,
20126
+ "loss": 9.4974,
20127
+ "step": 2874
20128
+ },
20129
+ {
20130
+ "epoch": 0.9917212832011039,
20131
+ "grad_norm": 1.174851417541504,
20132
+ "learning_rate": 1.813970328331527e-08,
20133
+ "loss": 9.5033,
20134
+ "step": 2875
20135
+ },
20136
+ {
20137
+ "epoch": 0.9920662297343912,
20138
+ "grad_norm": 1.2707513570785522,
20139
+ "learning_rate": 1.6659636085020192e-08,
20140
+ "loss": 9.4723,
20141
+ "step": 2876
20142
+ },
20143
+ {
20144
+ "epoch": 0.9924111762676785,
20145
+ "grad_norm": 1.474165678024292,
20146
+ "learning_rate": 1.5242536782317242e-08,
20147
+ "loss": 9.4423,
20148
+ "step": 2877
20149
+ },
20150
+ {
20151
+ "epoch": 0.9927561228009658,
20152
+ "grad_norm": 1.3797553777694702,
20153
+ "learning_rate": 1.388840716045059e-08,
20154
+ "loss": 9.4966,
20155
+ "step": 2878
20156
+ },
20157
+ {
20158
+ "epoch": 0.9931010693342532,
20159
+ "grad_norm": 1.326752781867981,
20160
+ "learning_rate": 1.2597248925311222e-08,
20161
+ "loss": 9.5313,
20162
+ "step": 2879
20163
+ },
20164
+ {
20165
+ "epoch": 0.9934460158675406,
20166
+ "grad_norm": 1.3259997367858887,
20167
+ "learning_rate": 1.1369063703475791e-08,
20168
+ "loss": 9.4992,
20169
+ "step": 2880
20170
+ },
20171
+ {
20172
+ "epoch": 0.9937909624008279,
20173
+ "grad_norm": 1.3481284379959106,
20174
+ "learning_rate": 1.0203853042184407e-08,
20175
+ "loss": 9.4431,
20176
+ "step": 2881
20177
+ },
20178
+ {
20179
+ "epoch": 0.9941359089341152,
20180
+ "grad_norm": 1.3297460079193115,
20181
+ "learning_rate": 9.101618409340651e-09,
20182
+ "loss": 9.4599,
20183
+ "step": 2882
20184
+ },
20185
+ {
20186
+ "epoch": 0.9944808554674025,
20187
+ "grad_norm": 1.3250812292099,
20188
+ "learning_rate": 8.06236119351711e-09,
20189
+ "loss": 9.4787,
20190
+ "step": 2883
20191
+ },
20192
+ {
20193
+ "epoch": 0.9948258020006899,
20194
+ "grad_norm": 1.3159713745117188,
20195
+ "learning_rate": 7.086082703949837e-09,
20196
+ "loss": 9.4653,
20197
+ "step": 2884
20198
+ },
20199
+ {
20200
+ "epoch": 0.9951707485339772,
20201
+ "grad_norm": 1.3725025653839111,
20202
+ "learning_rate": 6.172784170532797e-09,
20203
+ "loss": 9.5064,
20204
+ "step": 2885
20205
+ },
20206
+ {
20207
+ "epoch": 0.9955156950672646,
20208
+ "grad_norm": 1.3856096267700195,
20209
+ "learning_rate": 5.3224667438123155e-09,
20210
+ "loss": 9.4516,
20211
+ "step": 2886
20212
+ },
20213
+ {
20214
+ "epoch": 0.9958606416005519,
20215
+ "grad_norm": 1.391958475112915,
20216
+ "learning_rate": 4.53513149500373e-09,
20217
+ "loss": 9.4719,
20218
+ "step": 2887
20219
+ },
20220
+ {
20221
+ "epoch": 0.9962055881338393,
20222
+ "grad_norm": 1.5442167520523071,
20223
+ "learning_rate": 3.810779415974741e-09,
20224
+ "loss": 9.4641,
20225
+ "step": 2888
20226
+ },
20227
+ {
20228
+ "epoch": 0.9965505346671266,
20229
+ "grad_norm": 1.4803158044815063,
20230
+ "learning_rate": 3.1494114192509585e-09,
20231
+ "loss": 9.4186,
20232
+ "step": 2889
20233
+ },
20234
+ {
20235
+ "epoch": 0.9968954812004139,
20236
+ "grad_norm": 1.4997535943984985,
20237
+ "learning_rate": 2.5510283379992504e-09,
20238
+ "loss": 9.4915,
20239
+ "step": 2890
20240
+ },
20241
+ {
20242
+ "epoch": 0.9972404277337013,
20243
+ "grad_norm": 1.5729633569717407,
20244
+ "learning_rate": 2.0156309260610517e-09,
20245
+ "loss": 9.423,
20246
+ "step": 2891
20247
+ },
20248
+ {
20249
+ "epoch": 0.9975853742669886,
20250
+ "grad_norm": 1.605699062347412,
20251
+ "learning_rate": 1.5432198579079516e-09,
20252
+ "loss": 9.4781,
20253
+ "step": 2892
20254
+ },
20255
+ {
20256
+ "epoch": 0.997930320800276,
20257
+ "grad_norm": 1.4182220697402954,
20258
+ "learning_rate": 1.1337957286805534e-09,
20259
+ "loss": 9.453,
20260
+ "step": 2893
20261
+ },
20262
+ {
20263
+ "epoch": 0.9982752673335633,
20264
+ "grad_norm": 1.5460253953933716,
20265
+ "learning_rate": 7.873590541551679e-10,
20266
+ "loss": 9.5107,
20267
+ "step": 2894
20268
+ },
20269
+ {
20270
+ "epoch": 0.9986202138668506,
20271
+ "grad_norm": 1.456726312637329,
20272
+ "learning_rate": 5.039102707715682e-10,
20273
+ "loss": 9.3826,
20274
+ "step": 2895
20275
+ },
20276
+ {
20277
+ "epoch": 0.9989651604001379,
20278
+ "grad_norm": 1.55360746383667,
20279
+ "learning_rate": 2.8344973560523456e-10,
20280
+ "loss": 9.394,
20281
+ "step": 2896
20282
+ },
20283
+ {
20284
+ "epoch": 0.9993101069334254,
20285
+ "grad_norm": 1.6202263832092285,
20286
+ "learning_rate": 1.2597772639511006e-10,
20287
+ "loss": 9.4164,
20288
+ "step": 2897
20289
+ },
20290
+ {
20291
+ "epoch": 0.9996550534667127,
20292
+ "grad_norm": 1.6138488054275513,
20293
+ "learning_rate": 3.1494441515844684e-11,
20294
+ "loss": 9.3771,
20295
+ "step": 2898
20296
+ },
20297
+ {
20298
+ "epoch": 1.0,
20299
+ "grad_norm": 1.670608639717102,
20300
+ "learning_rate": 0.0,
20301
+ "loss": 9.3193,
20302
+ "step": 2899
20303
  }
20304
  ],
20305
  "logging_steps": 1,
 
20314
  "should_evaluate": false,
20315
  "should_log": false,
20316
  "should_save": true,
20317
+ "should_training_stop": true
20318
  },
20319
  "attributes": {}
20320
  }
20321
  },
20322
+ "total_flos": 601758459101184.0,
20323
  "train_batch_size": 4,
20324
  "trial_name": null,
20325
  "trial_params": null