Training in progress, step 2899, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2145944
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6ec0ef0bd58107442e53f9af82589272dd28dae4b201ef6aa9c49f0c83bfc1c
|
3 |
size 2145944
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4310020
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9965916e24cdc2427889fb3540f08506a8da344166dce118b8ba199ea51941c
|
3 |
size 4310020
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4242e9d0ac065fef7739bb68fca0ee428f925673e7726dd722595d4147173376
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a35fbe8edc635ebc2790ec5a9bb024f4b9252d94d57cb9bc113900a36047bd39
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -20083,6 +20083,223 @@
|
|
20083 |
"learning_rate": 3.0263108131095566e-08,
|
20084 |
"loss": 9.5316,
|
20085 |
"step": 2868
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20086 |
}
|
20087 |
],
|
20088 |
"logging_steps": 1,
|
@@ -20097,12 +20314,12 @@
|
|
20097 |
"should_evaluate": false,
|
20098 |
"should_log": false,
|
20099 |
"should_save": true,
|
20100 |
-
"should_training_stop":
|
20101 |
},
|
20102 |
"attributes": {}
|
20103 |
}
|
20104 |
},
|
20105 |
-
"total_flos":
|
20106 |
"train_batch_size": 4,
|
20107 |
"trial_name": null,
|
20108 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2899,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
20083 |
"learning_rate": 3.0263108131095566e-08,
|
20084 |
"loss": 9.5316,
|
20085 |
"step": 2868
|
20086 |
+
},
|
20087 |
+
{
|
20088 |
+
"epoch": 0.9896516040013797,
|
20089 |
+
"grad_norm": 1.1785590648651123,
|
20090 |
+
"learning_rate": 2.8342322313085202e-08,
|
20091 |
+
"loss": 9.4722,
|
20092 |
+
"step": 2869
|
20093 |
+
},
|
20094 |
+
{
|
20095 |
+
"epoch": 0.9899965505346672,
|
20096 |
+
"grad_norm": 1.2561894655227661,
|
20097 |
+
"learning_rate": 2.6484489673084257e-08,
|
20098 |
+
"loss": 9.5575,
|
20099 |
+
"step": 2870
|
20100 |
+
},
|
20101 |
+
{
|
20102 |
+
"epoch": 0.9903414970679545,
|
20103 |
+
"grad_norm": 1.1631916761398315,
|
20104 |
+
"learning_rate": 2.4689612551553888e-08,
|
20105 |
+
"loss": 9.5365,
|
20106 |
+
"step": 2871
|
20107 |
+
},
|
20108 |
+
{
|
20109 |
+
"epoch": 0.9906864436012418,
|
20110 |
+
"grad_norm": 1.2572925090789795,
|
20111 |
+
"learning_rate": 2.2957693209635368e-08,
|
20112 |
+
"loss": 9.5596,
|
20113 |
+
"step": 2872
|
20114 |
+
},
|
20115 |
+
{
|
20116 |
+
"epoch": 0.9910313901345291,
|
20117 |
+
"grad_norm": 1.324566125869751,
|
20118 |
+
"learning_rate": 2.1288733829161188e-08,
|
20119 |
+
"loss": 9.4521,
|
20120 |
+
"step": 2873
|
20121 |
+
},
|
20122 |
+
{
|
20123 |
+
"epoch": 0.9913763366678164,
|
20124 |
+
"grad_norm": 1.3486089706420898,
|
20125 |
+
"learning_rate": 1.9682736512660617e-08,
|
20126 |
+
"loss": 9.4974,
|
20127 |
+
"step": 2874
|
20128 |
+
},
|
20129 |
+
{
|
20130 |
+
"epoch": 0.9917212832011039,
|
20131 |
+
"grad_norm": 1.174851417541504,
|
20132 |
+
"learning_rate": 1.813970328331527e-08,
|
20133 |
+
"loss": 9.5033,
|
20134 |
+
"step": 2875
|
20135 |
+
},
|
20136 |
+
{
|
20137 |
+
"epoch": 0.9920662297343912,
|
20138 |
+
"grad_norm": 1.2707513570785522,
|
20139 |
+
"learning_rate": 1.6659636085020192e-08,
|
20140 |
+
"loss": 9.4723,
|
20141 |
+
"step": 2876
|
20142 |
+
},
|
20143 |
+
{
|
20144 |
+
"epoch": 0.9924111762676785,
|
20145 |
+
"grad_norm": 1.474165678024292,
|
20146 |
+
"learning_rate": 1.5242536782317242e-08,
|
20147 |
+
"loss": 9.4423,
|
20148 |
+
"step": 2877
|
20149 |
+
},
|
20150 |
+
{
|
20151 |
+
"epoch": 0.9927561228009658,
|
20152 |
+
"grad_norm": 1.3797553777694702,
|
20153 |
+
"learning_rate": 1.388840716045059e-08,
|
20154 |
+
"loss": 9.4966,
|
20155 |
+
"step": 2878
|
20156 |
+
},
|
20157 |
+
{
|
20158 |
+
"epoch": 0.9931010693342532,
|
20159 |
+
"grad_norm": 1.326752781867981,
|
20160 |
+
"learning_rate": 1.2597248925311222e-08,
|
20161 |
+
"loss": 9.5313,
|
20162 |
+
"step": 2879
|
20163 |
+
},
|
20164 |
+
{
|
20165 |
+
"epoch": 0.9934460158675406,
|
20166 |
+
"grad_norm": 1.3259997367858887,
|
20167 |
+
"learning_rate": 1.1369063703475791e-08,
|
20168 |
+
"loss": 9.4992,
|
20169 |
+
"step": 2880
|
20170 |
+
},
|
20171 |
+
{
|
20172 |
+
"epoch": 0.9937909624008279,
|
20173 |
+
"grad_norm": 1.3481284379959106,
|
20174 |
+
"learning_rate": 1.0203853042184407e-08,
|
20175 |
+
"loss": 9.4431,
|
20176 |
+
"step": 2881
|
20177 |
+
},
|
20178 |
+
{
|
20179 |
+
"epoch": 0.9941359089341152,
|
20180 |
+
"grad_norm": 1.3297460079193115,
|
20181 |
+
"learning_rate": 9.101618409340651e-09,
|
20182 |
+
"loss": 9.4599,
|
20183 |
+
"step": 2882
|
20184 |
+
},
|
20185 |
+
{
|
20186 |
+
"epoch": 0.9944808554674025,
|
20187 |
+
"grad_norm": 1.3250812292099,
|
20188 |
+
"learning_rate": 8.06236119351711e-09,
|
20189 |
+
"loss": 9.4787,
|
20190 |
+
"step": 2883
|
20191 |
+
},
|
20192 |
+
{
|
20193 |
+
"epoch": 0.9948258020006899,
|
20194 |
+
"grad_norm": 1.3159713745117188,
|
20195 |
+
"learning_rate": 7.086082703949837e-09,
|
20196 |
+
"loss": 9.4653,
|
20197 |
+
"step": 2884
|
20198 |
+
},
|
20199 |
+
{
|
20200 |
+
"epoch": 0.9951707485339772,
|
20201 |
+
"grad_norm": 1.3725025653839111,
|
20202 |
+
"learning_rate": 6.172784170532797e-09,
|
20203 |
+
"loss": 9.5064,
|
20204 |
+
"step": 2885
|
20205 |
+
},
|
20206 |
+
{
|
20207 |
+
"epoch": 0.9955156950672646,
|
20208 |
+
"grad_norm": 1.3856096267700195,
|
20209 |
+
"learning_rate": 5.3224667438123155e-09,
|
20210 |
+
"loss": 9.4516,
|
20211 |
+
"step": 2886
|
20212 |
+
},
|
20213 |
+
{
|
20214 |
+
"epoch": 0.9958606416005519,
|
20215 |
+
"grad_norm": 1.391958475112915,
|
20216 |
+
"learning_rate": 4.53513149500373e-09,
|
20217 |
+
"loss": 9.4719,
|
20218 |
+
"step": 2887
|
20219 |
+
},
|
20220 |
+
{
|
20221 |
+
"epoch": 0.9962055881338393,
|
20222 |
+
"grad_norm": 1.5442167520523071,
|
20223 |
+
"learning_rate": 3.810779415974741e-09,
|
20224 |
+
"loss": 9.4641,
|
20225 |
+
"step": 2888
|
20226 |
+
},
|
20227 |
+
{
|
20228 |
+
"epoch": 0.9965505346671266,
|
20229 |
+
"grad_norm": 1.4803158044815063,
|
20230 |
+
"learning_rate": 3.1494114192509585e-09,
|
20231 |
+
"loss": 9.4186,
|
20232 |
+
"step": 2889
|
20233 |
+
},
|
20234 |
+
{
|
20235 |
+
"epoch": 0.9968954812004139,
|
20236 |
+
"grad_norm": 1.4997535943984985,
|
20237 |
+
"learning_rate": 2.5510283379992504e-09,
|
20238 |
+
"loss": 9.4915,
|
20239 |
+
"step": 2890
|
20240 |
+
},
|
20241 |
+
{
|
20242 |
+
"epoch": 0.9972404277337013,
|
20243 |
+
"grad_norm": 1.5729633569717407,
|
20244 |
+
"learning_rate": 2.0156309260610517e-09,
|
20245 |
+
"loss": 9.423,
|
20246 |
+
"step": 2891
|
20247 |
+
},
|
20248 |
+
{
|
20249 |
+
"epoch": 0.9975853742669886,
|
20250 |
+
"grad_norm": 1.605699062347412,
|
20251 |
+
"learning_rate": 1.5432198579079516e-09,
|
20252 |
+
"loss": 9.4781,
|
20253 |
+
"step": 2892
|
20254 |
+
},
|
20255 |
+
{
|
20256 |
+
"epoch": 0.997930320800276,
|
20257 |
+
"grad_norm": 1.4182220697402954,
|
20258 |
+
"learning_rate": 1.1337957286805534e-09,
|
20259 |
+
"loss": 9.453,
|
20260 |
+
"step": 2893
|
20261 |
+
},
|
20262 |
+
{
|
20263 |
+
"epoch": 0.9982752673335633,
|
20264 |
+
"grad_norm": 1.5460253953933716,
|
20265 |
+
"learning_rate": 7.873590541551679e-10,
|
20266 |
+
"loss": 9.5107,
|
20267 |
+
"step": 2894
|
20268 |
+
},
|
20269 |
+
{
|
20270 |
+
"epoch": 0.9986202138668506,
|
20271 |
+
"grad_norm": 1.456726312637329,
|
20272 |
+
"learning_rate": 5.039102707715682e-10,
|
20273 |
+
"loss": 9.3826,
|
20274 |
+
"step": 2895
|
20275 |
+
},
|
20276 |
+
{
|
20277 |
+
"epoch": 0.9989651604001379,
|
20278 |
+
"grad_norm": 1.55360746383667,
|
20279 |
+
"learning_rate": 2.8344973560523456e-10,
|
20280 |
+
"loss": 9.394,
|
20281 |
+
"step": 2896
|
20282 |
+
},
|
20283 |
+
{
|
20284 |
+
"epoch": 0.9993101069334254,
|
20285 |
+
"grad_norm": 1.6202263832092285,
|
20286 |
+
"learning_rate": 1.2597772639511006e-10,
|
20287 |
+
"loss": 9.4164,
|
20288 |
+
"step": 2897
|
20289 |
+
},
|
20290 |
+
{
|
20291 |
+
"epoch": 0.9996550534667127,
|
20292 |
+
"grad_norm": 1.6138488054275513,
|
20293 |
+
"learning_rate": 3.1494441515844684e-11,
|
20294 |
+
"loss": 9.3771,
|
20295 |
+
"step": 2898
|
20296 |
+
},
|
20297 |
+
{
|
20298 |
+
"epoch": 1.0,
|
20299 |
+
"grad_norm": 1.670608639717102,
|
20300 |
+
"learning_rate": 0.0,
|
20301 |
+
"loss": 9.3193,
|
20302 |
+
"step": 2899
|
20303 |
}
|
20304 |
],
|
20305 |
"logging_steps": 1,
|
|
|
20314 |
"should_evaluate": false,
|
20315 |
"should_log": false,
|
20316 |
"should_save": true,
|
20317 |
+
"should_training_stop": true
|
20318 |
},
|
20319 |
"attributes": {}
|
20320 |
}
|
20321 |
},
|
20322 |
+
"total_flos": 601758459101184.0,
|
20323 |
"train_batch_size": 4,
|
20324 |
"trial_name": null,
|
20325 |
"trial_params": null
|