Training in progress, step 5945, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1467 -4
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 516802328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70c3b19bc4eb8be76e3f5a614ad9651dac58cc5c1cf201702fc3087309d23ca7
|
3 |
size 516802328
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3662bc0e65237626b783f1e110f32d0f58b50371353139f3380aa21907d63486
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee9c30235a00d4e956792d86eabe739f21989b7ae3dfec94f60c7acfc17fd683
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -40159,6 +40159,1469 @@
|
|
40159 |
"learning_rate": 3.151421924994513e-07,
|
40160 |
"loss": 0.0,
|
40161 |
"step": 5736
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40162 |
}
|
40163 |
],
|
40164 |
"logging_steps": 1,
|
@@ -40173,12 +41636,12 @@
|
|
40173 |
"should_evaluate": false,
|
40174 |
"should_log": false,
|
40175 |
"should_save": true,
|
40176 |
-
"should_training_stop":
|
40177 |
},
|
40178 |
"attributes": {}
|
40179 |
}
|
40180 |
},
|
40181 |
-
"total_flos": 1.
|
40182 |
"train_batch_size": 8,
|
40183 |
"trial_name": null,
|
40184 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9999159027836179,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5945,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
40159 |
"learning_rate": 3.151421924994513e-07,
|
40160 |
"loss": 0.0,
|
40161 |
"step": 5736
|
40162 |
+
},
|
40163 |
+
{
|
40164 |
+
"epoch": 0.9649314607686486,
|
40165 |
+
"grad_norm": NaN,
|
40166 |
+
"learning_rate": 3.121368265136704e-07,
|
40167 |
+
"loss": 0.0,
|
40168 |
+
"step": 5737
|
40169 |
+
},
|
40170 |
+
{
|
40171 |
+
"epoch": 0.9650996552014128,
|
40172 |
+
"grad_norm": NaN,
|
40173 |
+
"learning_rate": 3.091458147950255e-07,
|
40174 |
+
"loss": 0.0,
|
40175 |
+
"step": 5738
|
40176 |
+
},
|
40177 |
+
{
|
40178 |
+
"epoch": 0.9652678496341771,
|
40179 |
+
"grad_norm": NaN,
|
40180 |
+
"learning_rate": 3.0616915820758095e-07,
|
40181 |
+
"loss": 0.0,
|
40182 |
+
"step": 5739
|
40183 |
+
},
|
40184 |
+
{
|
40185 |
+
"epoch": 0.9654360440669414,
|
40186 |
+
"grad_norm": NaN,
|
40187 |
+
"learning_rate": 3.0320685761127123e-07,
|
40188 |
+
"loss": 0.0,
|
40189 |
+
"step": 5740
|
40190 |
+
},
|
40191 |
+
{
|
40192 |
+
"epoch": 0.9656042384997057,
|
40193 |
+
"grad_norm": NaN,
|
40194 |
+
"learning_rate": 3.002589138618561e-07,
|
40195 |
+
"loss": 0.0,
|
40196 |
+
"step": 5741
|
40197 |
+
},
|
40198 |
+
{
|
40199 |
+
"epoch": 0.96577243293247,
|
40200 |
+
"grad_norm": NaN,
|
40201 |
+
"learning_rate": 2.973253278109767e-07,
|
40202 |
+
"loss": 0.0,
|
40203 |
+
"step": 5742
|
40204 |
+
},
|
40205 |
+
{
|
40206 |
+
"epoch": 0.9659406273652342,
|
40207 |
+
"grad_norm": NaN,
|
40208 |
+
"learning_rate": 2.9440610030610494e-07,
|
40209 |
+
"loss": 0.0,
|
40210 |
+
"step": 5743
|
40211 |
+
},
|
40212 |
+
{
|
40213 |
+
"epoch": 0.9661088217979985,
|
40214 |
+
"grad_norm": NaN,
|
40215 |
+
"learning_rate": 2.91501232190583e-07,
|
40216 |
+
"loss": 0.0,
|
40217 |
+
"step": 5744
|
40218 |
+
},
|
40219 |
+
{
|
40220 |
+
"epoch": 0.9662770162307628,
|
40221 |
+
"grad_norm": NaN,
|
40222 |
+
"learning_rate": 2.8861072430358404e-07,
|
40223 |
+
"loss": 0.0,
|
40224 |
+
"step": 5745
|
40225 |
+
},
|
40226 |
+
{
|
40227 |
+
"epoch": 0.9664452106635271,
|
40228 |
+
"grad_norm": NaN,
|
40229 |
+
"learning_rate": 2.8573457748014564e-07,
|
40230 |
+
"loss": 0.0,
|
40231 |
+
"step": 5746
|
40232 |
+
},
|
40233 |
+
{
|
40234 |
+
"epoch": 0.9666134050962913,
|
40235 |
+
"grad_norm": NaN,
|
40236 |
+
"learning_rate": 2.8287279255115873e-07,
|
40237 |
+
"loss": 0.0,
|
40238 |
+
"step": 5747
|
40239 |
+
},
|
40240 |
+
{
|
40241 |
+
"epoch": 0.9667815995290556,
|
40242 |
+
"grad_norm": NaN,
|
40243 |
+
"learning_rate": 2.800253703433564e-07,
|
40244 |
+
"loss": 0.0,
|
40245 |
+
"step": 5748
|
40246 |
+
},
|
40247 |
+
{
|
40248 |
+
"epoch": 0.9669497939618199,
|
40249 |
+
"grad_norm": NaN,
|
40250 |
+
"learning_rate": 2.7719231167933067e-07,
|
40251 |
+
"loss": 0.0,
|
40252 |
+
"step": 5749
|
40253 |
+
},
|
40254 |
+
{
|
40255 |
+
"epoch": 0.9671179883945842,
|
40256 |
+
"grad_norm": NaN,
|
40257 |
+
"learning_rate": 2.743736173775213e-07,
|
40258 |
+
"loss": 0.0,
|
40259 |
+
"step": 5750
|
40260 |
+
},
|
40261 |
+
{
|
40262 |
+
"epoch": 0.9672861828273485,
|
40263 |
+
"grad_norm": NaN,
|
40264 |
+
"learning_rate": 2.715692882522103e-07,
|
40265 |
+
"loss": 0.0,
|
40266 |
+
"step": 5751
|
40267 |
+
},
|
40268 |
+
{
|
40269 |
+
"epoch": 0.9674543772601127,
|
40270 |
+
"grad_norm": NaN,
|
40271 |
+
"learning_rate": 2.687793251135384e-07,
|
40272 |
+
"loss": 0.0,
|
40273 |
+
"step": 5752
|
40274 |
+
},
|
40275 |
+
{
|
40276 |
+
"epoch": 0.967622571692877,
|
40277 |
+
"grad_norm": NaN,
|
40278 |
+
"learning_rate": 2.6600372876750544e-07,
|
40279 |
+
"loss": 0.0,
|
40280 |
+
"step": 5753
|
40281 |
+
},
|
40282 |
+
{
|
40283 |
+
"epoch": 0.9677907661256412,
|
40284 |
+
"grad_norm": NaN,
|
40285 |
+
"learning_rate": 2.6324250001593664e-07,
|
40286 |
+
"loss": 0.0,
|
40287 |
+
"step": 5754
|
40288 |
+
},
|
40289 |
+
{
|
40290 |
+
"epoch": 0.9679589605584055,
|
40291 |
+
"grad_norm": NaN,
|
40292 |
+
"learning_rate": 2.604956396565328e-07,
|
40293 |
+
"loss": 0.0,
|
40294 |
+
"step": 5755
|
40295 |
+
},
|
40296 |
+
{
|
40297 |
+
"epoch": 0.9681271549911697,
|
40298 |
+
"grad_norm": NaN,
|
40299 |
+
"learning_rate": 2.577631484828147e-07,
|
40300 |
+
"loss": 0.0,
|
40301 |
+
"step": 5756
|
40302 |
+
},
|
40303 |
+
{
|
40304 |
+
"epoch": 0.968295349423934,
|
40305 |
+
"grad_norm": NaN,
|
40306 |
+
"learning_rate": 2.550450272841842e-07,
|
40307 |
+
"loss": 0.0,
|
40308 |
+
"step": 5757
|
40309 |
+
},
|
40310 |
+
{
|
40311 |
+
"epoch": 0.9684635438566983,
|
40312 |
+
"grad_norm": NaN,
|
40313 |
+
"learning_rate": 2.523412768458688e-07,
|
40314 |
+
"loss": 0.0,
|
40315 |
+
"step": 5758
|
40316 |
+
},
|
40317 |
+
{
|
40318 |
+
"epoch": 0.9686317382894626,
|
40319 |
+
"grad_norm": NaN,
|
40320 |
+
"learning_rate": 2.4965189794895485e-07,
|
40321 |
+
"loss": 0.0,
|
40322 |
+
"step": 5759
|
40323 |
+
},
|
40324 |
+
{
|
40325 |
+
"epoch": 0.9687999327222269,
|
40326 |
+
"grad_norm": NaN,
|
40327 |
+
"learning_rate": 2.469768913703707e-07,
|
40328 |
+
"loss": 0.0,
|
40329 |
+
"step": 5760
|
40330 |
+
},
|
40331 |
+
{
|
40332 |
+
"epoch": 0.9689681271549911,
|
40333 |
+
"grad_norm": NaN,
|
40334 |
+
"learning_rate": 2.443162578828928e-07,
|
40335 |
+
"loss": 0.0,
|
40336 |
+
"step": 5761
|
40337 |
+
},
|
40338 |
+
{
|
40339 |
+
"epoch": 0.9691363215877554,
|
40340 |
+
"grad_norm": NaN,
|
40341 |
+
"learning_rate": 2.4166999825515625e-07,
|
40342 |
+
"loss": 0.0,
|
40343 |
+
"step": 5762
|
40344 |
+
},
|
40345 |
+
{
|
40346 |
+
"epoch": 0.9693045160205197,
|
40347 |
+
"grad_norm": NaN,
|
40348 |
+
"learning_rate": 2.3903811325163283e-07,
|
40349 |
+
"loss": 0.0,
|
40350 |
+
"step": 5763
|
40351 |
+
},
|
40352 |
+
{
|
40353 |
+
"epoch": 0.969472710453284,
|
40354 |
+
"grad_norm": NaN,
|
40355 |
+
"learning_rate": 2.3642060363264217e-07,
|
40356 |
+
"loss": 0.0,
|
40357 |
+
"step": 5764
|
40358 |
+
},
|
40359 |
+
{
|
40360 |
+
"epoch": 0.9696409048860483,
|
40361 |
+
"grad_norm": NaN,
|
40362 |
+
"learning_rate": 2.3381747015435163e-07,
|
40363 |
+
"loss": 0.0,
|
40364 |
+
"step": 5765
|
40365 |
+
},
|
40366 |
+
{
|
40367 |
+
"epoch": 0.9698090993188125,
|
40368 |
+
"grad_norm": NaN,
|
40369 |
+
"learning_rate": 2.312287135687874e-07,
|
40370 |
+
"loss": 0.0,
|
40371 |
+
"step": 5766
|
40372 |
+
},
|
40373 |
+
{
|
40374 |
+
"epoch": 0.9699772937515768,
|
40375 |
+
"grad_norm": NaN,
|
40376 |
+
"learning_rate": 2.2865433462380125e-07,
|
40377 |
+
"loss": 0.0,
|
40378 |
+
"step": 5767
|
40379 |
+
},
|
40380 |
+
{
|
40381 |
+
"epoch": 0.9701454881843411,
|
40382 |
+
"grad_norm": NaN,
|
40383 |
+
"learning_rate": 2.2609433406310941e-07,
|
40384 |
+
"loss": 0.0,
|
40385 |
+
"step": 5768
|
40386 |
+
},
|
40387 |
+
{
|
40388 |
+
"epoch": 0.9703136826171054,
|
40389 |
+
"grad_norm": NaN,
|
40390 |
+
"learning_rate": 2.2354871262626477e-07,
|
40391 |
+
"loss": 0.0,
|
40392 |
+
"step": 5769
|
40393 |
+
},
|
40394 |
+
{
|
40395 |
+
"epoch": 0.9704818770498697,
|
40396 |
+
"grad_norm": NaN,
|
40397 |
+
"learning_rate": 2.2101747104866788e-07,
|
40398 |
+
"loss": 0.0,
|
40399 |
+
"step": 5770
|
40400 |
+
},
|
40401 |
+
{
|
40402 |
+
"epoch": 0.9706500714826339,
|
40403 |
+
"grad_norm": NaN,
|
40404 |
+
"learning_rate": 2.185006100615672e-07,
|
40405 |
+
"loss": 0.0,
|
40406 |
+
"step": 5771
|
40407 |
+
},
|
40408 |
+
{
|
40409 |
+
"epoch": 0.9708182659153982,
|
40410 |
+
"grad_norm": NaN,
|
40411 |
+
"learning_rate": 2.159981303920533e-07,
|
40412 |
+
"loss": 0.0,
|
40413 |
+
"step": 5772
|
40414 |
+
},
|
40415 |
+
{
|
40416 |
+
"epoch": 0.9709864603481625,
|
40417 |
+
"grad_norm": NaN,
|
40418 |
+
"learning_rate": 2.1351003276307014e-07,
|
40419 |
+
"loss": 0.0,
|
40420 |
+
"step": 5773
|
40421 |
+
},
|
40422 |
+
{
|
40423 |
+
"epoch": 0.9711546547809268,
|
40424 |
+
"grad_norm": NaN,
|
40425 |
+
"learning_rate": 2.1103631789339272e-07,
|
40426 |
+
"loss": 0.0,
|
40427 |
+
"step": 5774
|
40428 |
+
},
|
40429 |
+
{
|
40430 |
+
"epoch": 0.971322849213691,
|
40431 |
+
"grad_norm": NaN,
|
40432 |
+
"learning_rate": 2.0857698649766055e-07,
|
40433 |
+
"loss": 0.0,
|
40434 |
+
"step": 5775
|
40435 |
+
},
|
40436 |
+
{
|
40437 |
+
"epoch": 0.9714910436464553,
|
40438 |
+
"grad_norm": NaN,
|
40439 |
+
"learning_rate": 2.061320392863386e-07,
|
40440 |
+
"loss": 0.0,
|
40441 |
+
"step": 5776
|
40442 |
+
},
|
40443 |
+
{
|
40444 |
+
"epoch": 0.9716592380792196,
|
40445 |
+
"grad_norm": NaN,
|
40446 |
+
"learning_rate": 2.0370147696574526e-07,
|
40447 |
+
"loss": 0.0,
|
40448 |
+
"step": 5777
|
40449 |
+
},
|
40450 |
+
{
|
40451 |
+
"epoch": 0.9718274325119839,
|
40452 |
+
"grad_norm": NaN,
|
40453 |
+
"learning_rate": 2.012853002380466e-07,
|
40454 |
+
"loss": 0.0,
|
40455 |
+
"step": 5778
|
40456 |
+
},
|
40457 |
+
{
|
40458 |
+
"epoch": 0.9719956269447482,
|
40459 |
+
"grad_norm": NaN,
|
40460 |
+
"learning_rate": 1.988835098012509e-07,
|
40461 |
+
"loss": 0.0,
|
40462 |
+
"step": 5779
|
40463 |
+
},
|
40464 |
+
{
|
40465 |
+
"epoch": 0.9721638213775124,
|
40466 |
+
"grad_norm": NaN,
|
40467 |
+
"learning_rate": 1.9649610634919767e-07,
|
40468 |
+
"loss": 0.0,
|
40469 |
+
"step": 5780
|
40470 |
+
},
|
40471 |
+
{
|
40472 |
+
"epoch": 0.9723320158102767,
|
40473 |
+
"grad_norm": NaN,
|
40474 |
+
"learning_rate": 1.9412309057159073e-07,
|
40475 |
+
"loss": 0.0,
|
40476 |
+
"step": 5781
|
40477 |
+
},
|
40478 |
+
{
|
40479 |
+
"epoch": 0.972500210243041,
|
40480 |
+
"grad_norm": NaN,
|
40481 |
+
"learning_rate": 1.9176446315397056e-07,
|
40482 |
+
"loss": 0.0,
|
40483 |
+
"step": 5782
|
40484 |
+
},
|
40485 |
+
{
|
40486 |
+
"epoch": 0.9726684046758052,
|
40487 |
+
"grad_norm": NaN,
|
40488 |
+
"learning_rate": 1.894202247777088e-07,
|
40489 |
+
"loss": 0.0,
|
40490 |
+
"step": 5783
|
40491 |
+
},
|
40492 |
+
{
|
40493 |
+
"epoch": 0.9728365991085695,
|
40494 |
+
"grad_norm": NaN,
|
40495 |
+
"learning_rate": 1.8709037612003045e-07,
|
40496 |
+
"loss": 0.0,
|
40497 |
+
"step": 5784
|
40498 |
+
},
|
40499 |
+
{
|
40500 |
+
"epoch": 0.9730047935413337,
|
40501 |
+
"grad_norm": NaN,
|
40502 |
+
"learning_rate": 1.8477491785400813e-07,
|
40503 |
+
"loss": 0.0,
|
40504 |
+
"step": 5785
|
40505 |
+
},
|
40506 |
+
{
|
40507 |
+
"epoch": 0.973172987974098,
|
40508 |
+
"grad_norm": NaN,
|
40509 |
+
"learning_rate": 1.8247385064855127e-07,
|
40510 |
+
"loss": 0.0,
|
40511 |
+
"step": 5786
|
40512 |
+
},
|
40513 |
+
{
|
40514 |
+
"epoch": 0.9733411824068623,
|
40515 |
+
"grad_norm": NaN,
|
40516 |
+
"learning_rate": 1.8018717516841143e-07,
|
40517 |
+
"loss": 0.0,
|
40518 |
+
"step": 5787
|
40519 |
+
},
|
40520 |
+
{
|
40521 |
+
"epoch": 0.9735093768396266,
|
40522 |
+
"grad_norm": NaN,
|
40523 |
+
"learning_rate": 1.779148920741769e-07,
|
40524 |
+
"loss": 0.0,
|
40525 |
+
"step": 5788
|
40526 |
+
},
|
40527 |
+
{
|
40528 |
+
"epoch": 0.9736775712723909,
|
40529 |
+
"grad_norm": NaN,
|
40530 |
+
"learning_rate": 1.7565700202229473e-07,
|
40531 |
+
"loss": 0.0,
|
40532 |
+
"step": 5789
|
40533 |
+
},
|
40534 |
+
{
|
40535 |
+
"epoch": 0.9738457657051551,
|
40536 |
+
"grad_norm": NaN,
|
40537 |
+
"learning_rate": 1.7341350566504323e-07,
|
40538 |
+
"loss": 0.0,
|
40539 |
+
"step": 5790
|
40540 |
+
},
|
40541 |
+
{
|
40542 |
+
"epoch": 0.9740139601379194,
|
40543 |
+
"grad_norm": NaN,
|
40544 |
+
"learning_rate": 1.7118440365053722e-07,
|
40545 |
+
"loss": 0.0,
|
40546 |
+
"step": 5791
|
40547 |
+
},
|
40548 |
+
{
|
40549 |
+
"epoch": 0.9741821545706837,
|
40550 |
+
"grad_norm": NaN,
|
40551 |
+
"learning_rate": 1.6896969662273944e-07,
|
40552 |
+
"loss": 0.0,
|
40553 |
+
"step": 5792
|
40554 |
+
},
|
40555 |
+
{
|
40556 |
+
"epoch": 0.974350349003448,
|
40557 |
+
"grad_norm": NaN,
|
40558 |
+
"learning_rate": 1.6676938522146023e-07,
|
40559 |
+
"loss": 0.0,
|
40560 |
+
"step": 5793
|
40561 |
+
},
|
40562 |
+
{
|
40563 |
+
"epoch": 0.9745185434362122,
|
40564 |
+
"grad_norm": NaN,
|
40565 |
+
"learning_rate": 1.645834700823412e-07,
|
40566 |
+
"loss": 0.0,
|
40567 |
+
"step": 5794
|
40568 |
+
},
|
40569 |
+
{
|
40570 |
+
"epoch": 0.9746867378689765,
|
40571 |
+
"grad_norm": NaN,
|
40572 |
+
"learning_rate": 1.6241195183686608e-07,
|
40573 |
+
"loss": 0.0,
|
40574 |
+
"step": 5795
|
40575 |
+
},
|
40576 |
+
{
|
40577 |
+
"epoch": 0.9748549323017408,
|
40578 |
+
"grad_norm": NaN,
|
40579 |
+
"learning_rate": 1.6025483111236638e-07,
|
40580 |
+
"loss": 0.0,
|
40581 |
+
"step": 5796
|
40582 |
+
},
|
40583 |
+
{
|
40584 |
+
"epoch": 0.9750231267345051,
|
40585 |
+
"grad_norm": NaN,
|
40586 |
+
"learning_rate": 1.581121085320103e-07,
|
40587 |
+
"loss": 0.0,
|
40588 |
+
"step": 5797
|
40589 |
+
},
|
40590 |
+
{
|
40591 |
+
"epoch": 0.9751913211672694,
|
40592 |
+
"grad_norm": NaN,
|
40593 |
+
"learning_rate": 1.5598378471480267e-07,
|
40594 |
+
"loss": 0.0,
|
40595 |
+
"step": 5798
|
40596 |
+
},
|
40597 |
+
{
|
40598 |
+
"epoch": 0.9753595156000336,
|
40599 |
+
"grad_norm": NaN,
|
40600 |
+
"learning_rate": 1.5386986027559613e-07,
|
40601 |
+
"loss": 0.0,
|
40602 |
+
"step": 5799
|
40603 |
+
},
|
40604 |
+
{
|
40605 |
+
"epoch": 0.9755277100327979,
|
40606 |
+
"grad_norm": NaN,
|
40607 |
+
"learning_rate": 1.5177033582507993e-07,
|
40608 |
+
"loss": 0.0,
|
40609 |
+
"step": 5800
|
40610 |
+
},
|
40611 |
+
{
|
40612 |
+
"epoch": 0.9756959044655622,
|
40613 |
+
"grad_norm": NaN,
|
40614 |
+
"learning_rate": 1.4968521196978002e-07,
|
40615 |
+
"loss": 0.0,
|
40616 |
+
"step": 5801
|
40617 |
+
},
|
40618 |
+
{
|
40619 |
+
"epoch": 0.9758640988983265,
|
40620 |
+
"grad_norm": NaN,
|
40621 |
+
"learning_rate": 1.4761448931206455e-07,
|
40622 |
+
"loss": 0.0,
|
40623 |
+
"step": 5802
|
40624 |
+
},
|
40625 |
+
{
|
40626 |
+
"epoch": 0.9760322933310908,
|
40627 |
+
"grad_norm": NaN,
|
40628 |
+
"learning_rate": 1.4555816845014948e-07,
|
40629 |
+
"loss": 0.0,
|
40630 |
+
"step": 5803
|
40631 |
+
},
|
40632 |
+
{
|
40633 |
+
"epoch": 0.976200487763855,
|
40634 |
+
"grad_norm": NaN,
|
40635 |
+
"learning_rate": 1.4351624997807623e-07,
|
40636 |
+
"loss": 0.0,
|
40637 |
+
"step": 5804
|
40638 |
+
},
|
40639 |
+
{
|
40640 |
+
"epoch": 0.9763686821966193,
|
40641 |
+
"grad_norm": NaN,
|
40642 |
+
"learning_rate": 1.4148873448573408e-07,
|
40643 |
+
"loss": 0.0,
|
40644 |
+
"step": 5805
|
40645 |
+
},
|
40646 |
+
{
|
40647 |
+
"epoch": 0.9765368766293836,
|
40648 |
+
"grad_norm": NaN,
|
40649 |
+
"learning_rate": 1.3947562255884338e-07,
|
40650 |
+
"loss": 0.0,
|
40651 |
+
"step": 5806
|
40652 |
+
},
|
40653 |
+
{
|
40654 |
+
"epoch": 0.9767050710621479,
|
40655 |
+
"grad_norm": NaN,
|
40656 |
+
"learning_rate": 1.374769147789834e-07,
|
40657 |
+
"loss": 0.0,
|
40658 |
+
"step": 5807
|
40659 |
+
},
|
40660 |
+
{
|
40661 |
+
"epoch": 0.9768732654949122,
|
40662 |
+
"grad_norm": NaN,
|
40663 |
+
"learning_rate": 1.3549261172354777e-07,
|
40664 |
+
"loss": 0.0,
|
40665 |
+
"step": 5808
|
40666 |
+
},
|
40667 |
+
{
|
40668 |
+
"epoch": 0.9770414599276764,
|
40669 |
+
"grad_norm": NaN,
|
40670 |
+
"learning_rate": 1.3352271396577798e-07,
|
40671 |
+
"loss": 0.0,
|
40672 |
+
"step": 5809
|
40673 |
+
},
|
40674 |
+
{
|
40675 |
+
"epoch": 0.9772096543604407,
|
40676 |
+
"grad_norm": NaN,
|
40677 |
+
"learning_rate": 1.3156722207476324e-07,
|
40678 |
+
"loss": 0.0,
|
40679 |
+
"step": 5810
|
40680 |
+
},
|
40681 |
+
{
|
40682 |
+
"epoch": 0.977377848793205,
|
40683 |
+
"grad_norm": NaN,
|
40684 |
+
"learning_rate": 1.2962613661541834e-07,
|
40685 |
+
"loss": 0.0,
|
40686 |
+
"step": 5811
|
40687 |
+
},
|
40688 |
+
{
|
40689 |
+
"epoch": 0.9775460432259693,
|
40690 |
+
"grad_norm": NaN,
|
40691 |
+
"learning_rate": 1.2769945814850582e-07,
|
40692 |
+
"loss": 0.0,
|
40693 |
+
"step": 5812
|
40694 |
+
},
|
40695 |
+
{
|
40696 |
+
"epoch": 0.9777142376587334,
|
40697 |
+
"grad_norm": NaN,
|
40698 |
+
"learning_rate": 1.2578718723061378e-07,
|
40699 |
+
"loss": 0.0,
|
40700 |
+
"step": 5813
|
40701 |
+
},
|
40702 |
+
{
|
40703 |
+
"epoch": 0.9778824320914977,
|
40704 |
+
"grad_norm": NaN,
|
40705 |
+
"learning_rate": 1.2388932441418367e-07,
|
40706 |
+
"loss": 0.0,
|
40707 |
+
"step": 5814
|
40708 |
+
},
|
40709 |
+
{
|
40710 |
+
"epoch": 0.978050626524262,
|
40711 |
+
"grad_norm": NaN,
|
40712 |
+
"learning_rate": 1.220058702474769e-07,
|
40713 |
+
"loss": 0.0,
|
40714 |
+
"step": 5815
|
40715 |
+
},
|
40716 |
+
{
|
40717 |
+
"epoch": 0.9782188209570263,
|
40718 |
+
"grad_norm": NaN,
|
40719 |
+
"learning_rate": 1.2013682527461379e-07,
|
40720 |
+
"loss": 0.0,
|
40721 |
+
"step": 5816
|
40722 |
+
},
|
40723 |
+
{
|
40724 |
+
"epoch": 0.9783870153897906,
|
40725 |
+
"grad_norm": NaN,
|
40726 |
+
"learning_rate": 1.1828219003553465e-07,
|
40727 |
+
"loss": 0.0,
|
40728 |
+
"step": 5817
|
40729 |
+
},
|
40730 |
+
{
|
40731 |
+
"epoch": 0.9785552098225548,
|
40732 |
+
"grad_norm": NaN,
|
40733 |
+
"learning_rate": 1.16441965066022e-07,
|
40734 |
+
"loss": 0.0,
|
40735 |
+
"step": 5818
|
40736 |
+
},
|
40737 |
+
{
|
40738 |
+
"epoch": 0.9787234042553191,
|
40739 |
+
"grad_norm": NaN,
|
40740 |
+
"learning_rate": 1.1461615089770062e-07,
|
40741 |
+
"loss": 0.0,
|
40742 |
+
"step": 5819
|
40743 |
+
},
|
40744 |
+
{
|
40745 |
+
"epoch": 0.9788915986880834,
|
40746 |
+
"grad_norm": NaN,
|
40747 |
+
"learning_rate": 1.1280474805802632e-07,
|
40748 |
+
"loss": 0.0,
|
40749 |
+
"step": 5820
|
40750 |
+
},
|
40751 |
+
{
|
40752 |
+
"epoch": 0.9790597931208477,
|
40753 |
+
"grad_norm": NaN,
|
40754 |
+
"learning_rate": 1.110077570702861e-07,
|
40755 |
+
"loss": 0.0,
|
40756 |
+
"step": 5821
|
40757 |
+
},
|
40758 |
+
{
|
40759 |
+
"epoch": 0.979227987553612,
|
40760 |
+
"grad_norm": NaN,
|
40761 |
+
"learning_rate": 1.0922517845362023e-07,
|
40762 |
+
"loss": 0.0,
|
40763 |
+
"step": 5822
|
40764 |
+
},
|
40765 |
+
{
|
40766 |
+
"epoch": 0.9793961819863762,
|
40767 |
+
"grad_norm": NaN,
|
40768 |
+
"learning_rate": 1.0745701272298902e-07,
|
40769 |
+
"loss": 0.0,
|
40770 |
+
"step": 5823
|
40771 |
+
},
|
40772 |
+
{
|
40773 |
+
"epoch": 0.9795643764191405,
|
40774 |
+
"grad_norm": NaN,
|
40775 |
+
"learning_rate": 1.0570326038920053e-07,
|
40776 |
+
"loss": 0.0,
|
40777 |
+
"step": 5824
|
40778 |
+
},
|
40779 |
+
{
|
40780 |
+
"epoch": 0.9797325708519048,
|
40781 |
+
"grad_norm": NaN,
|
40782 |
+
"learning_rate": 1.0396392195889393e-07,
|
40783 |
+
"loss": 0.0,
|
40784 |
+
"step": 5825
|
40785 |
+
},
|
40786 |
+
{
|
40787 |
+
"epoch": 0.9799007652846691,
|
40788 |
+
"grad_norm": NaN,
|
40789 |
+
"learning_rate": 1.022389979345395e-07,
|
40790 |
+
"loss": 0.0,
|
40791 |
+
"step": 5826
|
40792 |
+
},
|
40793 |
+
{
|
40794 |
+
"epoch": 0.9800689597174334,
|
40795 |
+
"grad_norm": NaN,
|
40796 |
+
"learning_rate": 1.0052848881444976e-07,
|
40797 |
+
"loss": 0.0,
|
40798 |
+
"step": 5827
|
40799 |
+
},
|
40800 |
+
{
|
40801 |
+
"epoch": 0.9802371541501976,
|
40802 |
+
"grad_norm": NaN,
|
40803 |
+
"learning_rate": 9.883239509277942e-08,
|
40804 |
+
"loss": 0.0,
|
40805 |
+
"step": 5828
|
40806 |
+
},
|
40807 |
+
{
|
40808 |
+
"epoch": 0.9804053485829619,
|
40809 |
+
"grad_norm": NaN,
|
40810 |
+
"learning_rate": 9.715071725949765e-08,
|
40811 |
+
"loss": 0.0,
|
40812 |
+
"step": 5829
|
40813 |
+
},
|
40814 |
+
{
|
40815 |
+
"epoch": 0.9805735430157262,
|
40816 |
+
"grad_norm": NaN,
|
40817 |
+
"learning_rate": 9.54834558004325e-08,
|
40818 |
+
"loss": 0.0,
|
40819 |
+
"step": 5830
|
40820 |
+
},
|
40821 |
+
{
|
40822 |
+
"epoch": 0.9807417374484905,
|
40823 |
+
"grad_norm": NaN,
|
40824 |
+
"learning_rate": 9.383061119723757e-08,
|
40825 |
+
"loss": 0.0,
|
40826 |
+
"step": 5831
|
40827 |
+
},
|
40828 |
+
{
|
40829 |
+
"epoch": 0.9809099318812547,
|
40830 |
+
"grad_norm": NaN,
|
40831 |
+
"learning_rate": 9.219218392739759e-08,
|
40832 |
+
"loss": 0.0,
|
40833 |
+
"step": 5832
|
40834 |
+
},
|
40835 |
+
{
|
40836 |
+
"epoch": 0.981078126314019,
|
40837 |
+
"grad_norm": NaN,
|
40838 |
+
"learning_rate": 9.056817446422839e-08,
|
40839 |
+
"loss": 0.0,
|
40840 |
+
"step": 5833
|
40841 |
+
},
|
40842 |
+
{
|
40843 |
+
"epoch": 0.9812463207467833,
|
40844 |
+
"grad_norm": NaN,
|
40845 |
+
"learning_rate": 8.895858327690464e-08,
|
40846 |
+
"loss": 0.0,
|
40847 |
+
"step": 5834
|
40848 |
+
},
|
40849 |
+
{
|
40850 |
+
"epoch": 0.9814145151795476,
|
40851 |
+
"grad_norm": NaN,
|
40852 |
+
"learning_rate": 8.736341083041e-08,
|
40853 |
+
"loss": 0.0,
|
40854 |
+
"step": 5835
|
40855 |
+
},
|
40856 |
+
{
|
40857 |
+
"epoch": 0.9815827096123119,
|
40858 |
+
"grad_norm": NaN,
|
40859 |
+
"learning_rate": 8.578265758557024e-08,
|
40860 |
+
"loss": 0.0,
|
40861 |
+
"step": 5836
|
40862 |
+
},
|
40863 |
+
{
|
40864 |
+
"epoch": 0.9817509040450761,
|
40865 |
+
"grad_norm": NaN,
|
40866 |
+
"learning_rate": 8.421632399904788e-08,
|
40867 |
+
"loss": 0.0,
|
40868 |
+
"step": 5837
|
40869 |
+
},
|
40870 |
+
{
|
40871 |
+
"epoch": 0.9819190984778404,
|
40872 |
+
"grad_norm": NaN,
|
40873 |
+
"learning_rate": 8.266441052334206e-08,
|
40874 |
+
"loss": 0.0,
|
40875 |
+
"step": 5838
|
40876 |
+
},
|
40877 |
+
{
|
40878 |
+
"epoch": 0.9820872929106047,
|
40879 |
+
"grad_norm": NaN,
|
40880 |
+
"learning_rate": 8.112691760677749e-08,
|
40881 |
+
"loss": 0.0,
|
40882 |
+
"step": 5839
|
40883 |
+
},
|
40884 |
+
{
|
40885 |
+
"epoch": 0.982255487343369,
|
40886 |
+
"grad_norm": NaN,
|
40887 |
+
"learning_rate": 7.960384569353219e-08,
|
40888 |
+
"loss": 0.0,
|
40889 |
+
"step": 5840
|
40890 |
+
},
|
40891 |
+
{
|
40892 |
+
"epoch": 0.9824236817761333,
|
40893 |
+
"grad_norm": NaN,
|
40894 |
+
"learning_rate": 7.809519522358755e-08,
|
40895 |
+
"loss": 0.0,
|
40896 |
+
"step": 5841
|
40897 |
+
},
|
40898 |
+
{
|
40899 |
+
"epoch": 0.9825918762088974,
|
40900 |
+
"grad_norm": NaN,
|
40901 |
+
"learning_rate": 7.660096663278938e-08,
|
40902 |
+
"loss": 0.0,
|
40903 |
+
"step": 5842
|
40904 |
+
},
|
40905 |
+
{
|
40906 |
+
"epoch": 0.9827600706416617,
|
40907 |
+
"grad_norm": NaN,
|
40908 |
+
"learning_rate": 7.512116035279237e-08,
|
40909 |
+
"loss": 0.0,
|
40910 |
+
"step": 5843
|
40911 |
+
},
|
40912 |
+
{
|
40913 |
+
"epoch": 0.982928265074426,
|
40914 |
+
"grad_norm": NaN,
|
40915 |
+
"learning_rate": 7.365577681110458e-08,
|
40916 |
+
"loss": 0.0,
|
40917 |
+
"step": 5844
|
40918 |
+
},
|
40919 |
+
{
|
40920 |
+
"epoch": 0.9830964595071903,
|
40921 |
+
"grad_norm": NaN,
|
40922 |
+
"learning_rate": 7.220481643105403e-08,
|
40923 |
+
"loss": 0.0,
|
40924 |
+
"step": 5845
|
40925 |
+
},
|
40926 |
+
{
|
40927 |
+
"epoch": 0.9832646539399545,
|
40928 |
+
"grad_norm": NaN,
|
40929 |
+
"learning_rate": 7.076827963181099e-08,
|
40930 |
+
"loss": 0.0,
|
40931 |
+
"step": 5846
|
40932 |
+
},
|
40933 |
+
{
|
40934 |
+
"epoch": 0.9834328483727188,
|
40935 |
+
"grad_norm": NaN,
|
40936 |
+
"learning_rate": 6.934616682837125e-08,
|
40937 |
+
"loss": 0.0,
|
40938 |
+
"step": 5847
|
40939 |
+
},
|
40940 |
+
{
|
40941 |
+
"epoch": 0.9836010428054831,
|
40942 |
+
"grad_norm": NaN,
|
40943 |
+
"learning_rate": 6.79384784315673e-08,
|
40944 |
+
"loss": 0.0,
|
40945 |
+
"step": 5848
|
40946 |
+
},
|
40947 |
+
{
|
40948 |
+
"epoch": 0.9837692372382474,
|
40949 |
+
"grad_norm": NaN,
|
40950 |
+
"learning_rate": 6.65452148480683e-08,
|
40951 |
+
"loss": 0.0,
|
40952 |
+
"step": 5849
|
40953 |
+
},
|
40954 |
+
{
|
40955 |
+
"epoch": 0.9839374316710117,
|
40956 |
+
"grad_norm": NaN,
|
40957 |
+
"learning_rate": 6.516637648036894e-08,
|
40958 |
+
"loss": 0.0,
|
40959 |
+
"step": 5850
|
40960 |
+
},
|
40961 |
+
{
|
40962 |
+
"epoch": 0.9841056261037759,
|
40963 |
+
"grad_norm": NaN,
|
40964 |
+
"learning_rate": 6.380196372680058e-08,
|
40965 |
+
"loss": 0.0,
|
40966 |
+
"step": 5851
|
40967 |
+
},
|
40968 |
+
{
|
40969 |
+
"epoch": 0.9842738205365402,
|
40970 |
+
"grad_norm": NaN,
|
40971 |
+
"learning_rate": 6.245197698152571e-08,
|
40972 |
+
"loss": 0.0,
|
40973 |
+
"step": 5852
|
40974 |
+
},
|
40975 |
+
{
|
40976 |
+
"epoch": 0.9844420149693045,
|
40977 |
+
"grad_norm": NaN,
|
40978 |
+
"learning_rate": 6.111641663454903e-08,
|
40979 |
+
"loss": 0.0,
|
40980 |
+
"step": 5853
|
40981 |
+
},
|
40982 |
+
{
|
40983 |
+
"epoch": 0.9846102094020688,
|
40984 |
+
"grad_norm": NaN,
|
40985 |
+
"learning_rate": 5.979528307168414e-08,
|
40986 |
+
"loss": 0.0,
|
40987 |
+
"step": 5854
|
40988 |
+
},
|
40989 |
+
{
|
40990 |
+
"epoch": 0.9847784038348331,
|
40991 |
+
"grad_norm": NaN,
|
40992 |
+
"learning_rate": 5.84885766746035e-08,
|
40993 |
+
"loss": 0.0,
|
40994 |
+
"step": 5855
|
40995 |
+
},
|
40996 |
+
{
|
40997 |
+
"epoch": 0.9849465982675973,
|
40998 |
+
"grad_norm": NaN,
|
40999 |
+
"learning_rate": 5.7196297820794054e-08,
|
41000 |
+
"loss": 0.0,
|
41001 |
+
"step": 5856
|
41002 |
+
},
|
41003 |
+
{
|
41004 |
+
"epoch": 0.9851147927003616,
|
41005 |
+
"grad_norm": NaN,
|
41006 |
+
"learning_rate": 5.591844688358494e-08,
|
41007 |
+
"loss": 0.0,
|
41008 |
+
"step": 5857
|
41009 |
+
},
|
41010 |
+
{
|
41011 |
+
"epoch": 0.9852829871331259,
|
41012 |
+
"grad_norm": NaN,
|
41013 |
+
"learning_rate": 5.465502423213087e-08,
|
41014 |
+
"loss": 0.0,
|
41015 |
+
"step": 5858
|
41016 |
+
},
|
41017 |
+
{
|
41018 |
+
"epoch": 0.9854511815658902,
|
41019 |
+
"grad_norm": NaN,
|
41020 |
+
"learning_rate": 5.340603023141766e-08,
|
41021 |
+
"loss": 0.0,
|
41022 |
+
"step": 5859
|
41023 |
+
},
|
41024 |
+
{
|
41025 |
+
"epoch": 0.9856193759986545,
|
41026 |
+
"grad_norm": NaN,
|
41027 |
+
"learning_rate": 5.217146524226779e-08,
|
41028 |
+
"loss": 0.0,
|
41029 |
+
"step": 5860
|
41030 |
+
},
|
41031 |
+
{
|
41032 |
+
"epoch": 0.9857875704314187,
|
41033 |
+
"grad_norm": NaN,
|
41034 |
+
"learning_rate": 5.0951329621340416e-08,
|
41035 |
+
"loss": 0.0,
|
41036 |
+
"step": 5861
|
41037 |
+
},
|
41038 |
+
{
|
41039 |
+
"epoch": 0.985955764864183,
|
41040 |
+
"grad_norm": NaN,
|
41041 |
+
"learning_rate": 4.9745623721109135e-08,
|
41042 |
+
"loss": 0.0,
|
41043 |
+
"step": 5862
|
41044 |
+
},
|
41045 |
+
{
|
41046 |
+
"epoch": 0.9861239592969473,
|
41047 |
+
"grad_norm": NaN,
|
41048 |
+
"learning_rate": 4.855434788988977e-08,
|
41049 |
+
"loss": 0.0,
|
41050 |
+
"step": 5863
|
41051 |
+
},
|
41052 |
+
{
|
41053 |
+
"epoch": 0.9862921537297116,
|
41054 |
+
"grad_norm": NaN,
|
41055 |
+
"learning_rate": 4.737750247183481e-08,
|
41056 |
+
"loss": 0.0,
|
41057 |
+
"step": 5864
|
41058 |
+
},
|
41059 |
+
{
|
41060 |
+
"epoch": 0.9864603481624759,
|
41061 |
+
"grad_norm": NaN,
|
41062 |
+
"learning_rate": 4.621508780691119e-08,
|
41063 |
+
"loss": 0.0,
|
41064 |
+
"step": 5865
|
41065 |
+
},
|
41066 |
+
{
|
41067 |
+
"epoch": 0.9866285425952401,
|
41068 |
+
"grad_norm": NaN,
|
41069 |
+
"learning_rate": 4.506710423093918e-08,
|
41070 |
+
"loss": 0.0,
|
41071 |
+
"step": 5866
|
41072 |
+
},
|
41073 |
+
{
|
41074 |
+
"epoch": 0.9867967370280044,
|
41075 |
+
"grad_norm": NaN,
|
41076 |
+
"learning_rate": 4.39335520755535e-08,
|
41077 |
+
"loss": 0.0,
|
41078 |
+
"step": 5867
|
41079 |
+
},
|
41080 |
+
{
|
41081 |
+
"epoch": 0.9869649314607687,
|
41082 |
+
"grad_norm": NaN,
|
41083 |
+
"learning_rate": 4.281443166822552e-08,
|
41084 |
+
"loss": 0.0,
|
41085 |
+
"step": 5868
|
41086 |
+
},
|
41087 |
+
{
|
41088 |
+
"epoch": 0.987133125893533,
|
41089 |
+
"grad_norm": NaN,
|
41090 |
+
"learning_rate": 4.1709743332252196e-08,
|
41091 |
+
"loss": 0.0,
|
41092 |
+
"step": 5869
|
41093 |
+
},
|
41094 |
+
{
|
41095 |
+
"epoch": 0.9873013203262972,
|
41096 |
+
"grad_norm": NaN,
|
41097 |
+
"learning_rate": 4.061948738677268e-08,
|
41098 |
+
"loss": 0.0,
|
41099 |
+
"step": 5870
|
41100 |
+
},
|
41101 |
+
{
|
41102 |
+
"epoch": 0.9874695147590615,
|
41103 |
+
"grad_norm": NaN,
|
41104 |
+
"learning_rate": 3.9543664146746154e-08,
|
41105 |
+
"loss": 0.0,
|
41106 |
+
"step": 5871
|
41107 |
+
},
|
41108 |
+
{
|
41109 |
+
"epoch": 0.9876377091918257,
|
41110 |
+
"grad_norm": NaN,
|
41111 |
+
"learning_rate": 3.8482273922962884e-08,
|
41112 |
+
"loss": 0.0,
|
41113 |
+
"step": 5872
|
41114 |
+
},
|
41115 |
+
{
|
41116 |
+
"epoch": 0.98780590362459,
|
41117 |
+
"grad_norm": NaN,
|
41118 |
+
"learning_rate": 3.743531702204983e-08,
|
41119 |
+
"loss": 0.0,
|
41120 |
+
"step": 5873
|
41121 |
+
},
|
41122 |
+
{
|
41123 |
+
"epoch": 0.9879740980573543,
|
41124 |
+
"grad_norm": NaN,
|
41125 |
+
"learning_rate": 3.6402793746465045e-08,
|
41126 |
+
"loss": 0.0,
|
41127 |
+
"step": 5874
|
41128 |
+
},
|
41129 |
+
{
|
41130 |
+
"epoch": 0.9881422924901185,
|
41131 |
+
"grad_norm": NaN,
|
41132 |
+
"learning_rate": 3.538470439448105e-08,
|
41133 |
+
"loss": 0.0,
|
41134 |
+
"step": 5875
|
41135 |
+
},
|
41136 |
+
{
|
41137 |
+
"epoch": 0.9883104869228828,
|
41138 |
+
"grad_norm": NaN,
|
41139 |
+
"learning_rate": 3.438104926022923e-08,
|
41140 |
+
"loss": 0.0,
|
41141 |
+
"step": 5876
|
41142 |
+
},
|
41143 |
+
{
|
41144 |
+
"epoch": 0.9884786813556471,
|
41145 |
+
"grad_norm": NaN,
|
41146 |
+
"learning_rate": 3.339182863363877e-08,
|
41147 |
+
"loss": 0.0,
|
41148 |
+
"step": 5877
|
41149 |
+
},
|
41150 |
+
{
|
41151 |
+
"epoch": 0.9886468757884114,
|
41152 |
+
"grad_norm": NaN,
|
41153 |
+
"learning_rate": 3.241704280049218e-08,
|
41154 |
+
"loss": 0.0,
|
41155 |
+
"step": 5878
|
41156 |
+
},
|
41157 |
+
{
|
41158 |
+
"epoch": 0.9888150702211757,
|
41159 |
+
"grad_norm": NaN,
|
41160 |
+
"learning_rate": 3.145669204239754e-08,
|
41161 |
+
"loss": 0.0,
|
41162 |
+
"step": 5879
|
41163 |
+
},
|
41164 |
+
{
|
41165 |
+
"epoch": 0.9889832646539399,
|
41166 |
+
"grad_norm": NaN,
|
41167 |
+
"learning_rate": 3.051077663677737e-08,
|
41168 |
+
"loss": 0.0,
|
41169 |
+
"step": 5880
|
41170 |
+
},
|
41171 |
+
{
|
41172 |
+
"epoch": 0.9891514590867042,
|
41173 |
+
"grad_norm": NaN,
|
41174 |
+
"learning_rate": 2.9579296856907523e-08,
|
41175 |
+
"loss": 0.0,
|
41176 |
+
"step": 5881
|
41177 |
+
},
|
41178 |
+
{
|
41179 |
+
"epoch": 0.9893196535194685,
|
41180 |
+
"grad_norm": NaN,
|
41181 |
+
"learning_rate": 2.86622529718783e-08,
|
41182 |
+
"loss": 0.0,
|
41183 |
+
"step": 5882
|
41184 |
+
},
|
41185 |
+
{
|
41186 |
+
"epoch": 0.9894878479522328,
|
41187 |
+
"grad_norm": NaN,
|
41188 |
+
"learning_rate": 2.775964524661667e-08,
|
41189 |
+
"loss": 0.0,
|
41190 |
+
"step": 5883
|
41191 |
+
},
|
41192 |
+
{
|
41193 |
+
"epoch": 0.989656042384997,
|
41194 |
+
"grad_norm": NaN,
|
41195 |
+
"learning_rate": 2.6871473941864067e-08,
|
41196 |
+
"loss": 0.0,
|
41197 |
+
"step": 5884
|
41198 |
+
},
|
41199 |
+
{
|
41200 |
+
"epoch": 0.9898242368177613,
|
41201 |
+
"grad_norm": NaN,
|
41202 |
+
"learning_rate": 2.599773931422078e-08,
|
41203 |
+
"loss": 0.0,
|
41204 |
+
"step": 5885
|
41205 |
+
},
|
41206 |
+
{
|
41207 |
+
"epoch": 0.9899924312505256,
|
41208 |
+
"grad_norm": NaN,
|
41209 |
+
"learning_rate": 2.5138441616079367e-08,
|
41210 |
+
"loss": 0.0,
|
41211 |
+
"step": 5886
|
41212 |
+
},
|
41213 |
+
{
|
41214 |
+
"epoch": 0.9901606256832899,
|
41215 |
+
"grad_norm": NaN,
|
41216 |
+
"learning_rate": 2.4293581095696794e-08,
|
41217 |
+
"loss": 0.0,
|
41218 |
+
"step": 5887
|
41219 |
+
},
|
41220 |
+
{
|
41221 |
+
"epoch": 0.9903288201160542,
|
41222 |
+
"grad_norm": NaN,
|
41223 |
+
"learning_rate": 2.346315799713894e-08,
|
41224 |
+
"loss": 0.0,
|
41225 |
+
"step": 5888
|
41226 |
+
},
|
41227 |
+
{
|
41228 |
+
"epoch": 0.9904970145488184,
|
41229 |
+
"grad_norm": NaN,
|
41230 |
+
"learning_rate": 2.264717256030835e-08,
|
41231 |
+
"loss": 0.0,
|
41232 |
+
"step": 5889
|
41233 |
+
},
|
41234 |
+
{
|
41235 |
+
"epoch": 0.9906652089815827,
|
41236 |
+
"grad_norm": NaN,
|
41237 |
+
"learning_rate": 2.1845625020927572e-08,
|
41238 |
+
"loss": 0.0,
|
41239 |
+
"step": 5890
|
41240 |
+
},
|
41241 |
+
{
|
41242 |
+
"epoch": 0.990833403414347,
|
41243 |
+
"grad_norm": NaN,
|
41244 |
+
"learning_rate": 2.105851561056138e-08,
|
41245 |
+
"loss": 0.0,
|
41246 |
+
"step": 5891
|
41247 |
+
},
|
41248 |
+
{
|
41249 |
+
"epoch": 0.9910015978471113,
|
41250 |
+
"grad_norm": NaN,
|
41251 |
+
"learning_rate": 2.0285844556588996e-08,
|
41252 |
+
"loss": 0.0,
|
41253 |
+
"step": 5892
|
41254 |
+
},
|
41255 |
+
{
|
41256 |
+
"epoch": 0.9911697922798756,
|
41257 |
+
"grad_norm": NaN,
|
41258 |
+
"learning_rate": 1.952761208223186e-08,
|
41259 |
+
"loss": 0.0,
|
41260 |
+
"step": 5893
|
41261 |
+
},
|
41262 |
+
{
|
41263 |
+
"epoch": 0.9913379867126398,
|
41264 |
+
"grad_norm": NaN,
|
41265 |
+
"learning_rate": 1.878381840653698e-08,
|
41266 |
+
"loss": 0.0,
|
41267 |
+
"step": 5894
|
41268 |
+
},
|
41269 |
+
{
|
41270 |
+
"epoch": 0.9915061811454041,
|
41271 |
+
"grad_norm": NaN,
|
41272 |
+
"learning_rate": 1.8054463744376914e-08,
|
41273 |
+
"loss": 0.0,
|
41274 |
+
"step": 5895
|
41275 |
+
},
|
41276 |
+
{
|
41277 |
+
"epoch": 0.9916743755781684,
|
41278 |
+
"grad_norm": NaN,
|
41279 |
+
"learning_rate": 1.7339548306449794e-08,
|
41280 |
+
"loss": 0.0,
|
41281 |
+
"step": 5896
|
41282 |
+
},
|
41283 |
+
{
|
41284 |
+
"epoch": 0.9918425700109327,
|
41285 |
+
"grad_norm": NaN,
|
41286 |
+
"learning_rate": 1.6639072299284852e-08,
|
41287 |
+
"loss": 0.0,
|
41288 |
+
"step": 5897
|
41289 |
+
},
|
41290 |
+
{
|
41291 |
+
"epoch": 0.992010764443697,
|
41292 |
+
"grad_norm": NaN,
|
41293 |
+
"learning_rate": 1.5953035925253547e-08,
|
41294 |
+
"loss": 0.0,
|
41295 |
+
"step": 5898
|
41296 |
+
},
|
41297 |
+
{
|
41298 |
+
"epoch": 0.9921789588764612,
|
41299 |
+
"grad_norm": NaN,
|
41300 |
+
"learning_rate": 1.528143938253068e-08,
|
41301 |
+
"loss": 0.0,
|
41302 |
+
"step": 5899
|
41303 |
+
},
|
41304 |
+
{
|
41305 |
+
"epoch": 0.9923471533092255,
|
41306 |
+
"grad_norm": NaN,
|
41307 |
+
"learning_rate": 1.4624282865144389e-08,
|
41308 |
+
"loss": 0.0,
|
41309 |
+
"step": 5900
|
41310 |
+
},
|
41311 |
+
{
|
41312 |
+
"epoch": 0.9925153477419898,
|
41313 |
+
"grad_norm": NaN,
|
41314 |
+
"learning_rate": 1.3981566562931702e-08,
|
41315 |
+
"loss": 0.0,
|
41316 |
+
"step": 5901
|
41317 |
+
},
|
41318 |
+
{
|
41319 |
+
"epoch": 0.992683542174754,
|
41320 |
+
"grad_norm": NaN,
|
41321 |
+
"learning_rate": 1.3353290661571871e-08,
|
41322 |
+
"loss": 0.0,
|
41323 |
+
"step": 5902
|
41324 |
+
},
|
41325 |
+
{
|
41326 |
+
"epoch": 0.9928517366075182,
|
41327 |
+
"grad_norm": NaN,
|
41328 |
+
"learning_rate": 1.2739455342558603e-08,
|
41329 |
+
"loss": 0.0,
|
41330 |
+
"step": 5903
|
41331 |
+
},
|
41332 |
+
{
|
41333 |
+
"epoch": 0.9930199310402825,
|
41334 |
+
"grad_norm": NaN,
|
41335 |
+
"learning_rate": 1.2140060783227824e-08,
|
41336 |
+
"loss": 0.0,
|
41337 |
+
"step": 5904
|
41338 |
+
},
|
41339 |
+
{
|
41340 |
+
"epoch": 0.9931881254730468,
|
41341 |
+
"grad_norm": NaN,
|
41342 |
+
"learning_rate": 1.155510715674102e-08,
|
41343 |
+
"loss": 0.0,
|
41344 |
+
"step": 5905
|
41345 |
+
},
|
41346 |
+
{
|
41347 |
+
"epoch": 0.9933563199058111,
|
41348 |
+
"grad_norm": NaN,
|
41349 |
+
"learning_rate": 1.098459463207968e-08,
|
41350 |
+
"loss": 0.0,
|
41351 |
+
"step": 5906
|
41352 |
+
},
|
41353 |
+
{
|
41354 |
+
"epoch": 0.9935245143385754,
|
41355 |
+
"grad_norm": NaN,
|
41356 |
+
"learning_rate": 1.042852337406197e-08,
|
41357 |
+
"loss": 0.0,
|
41358 |
+
"step": 5907
|
41359 |
+
},
|
41360 |
+
{
|
41361 |
+
"epoch": 0.9936927087713396,
|
41362 |
+
"grad_norm": NaN,
|
41363 |
+
"learning_rate": 9.88689354332606e-09,
|
41364 |
+
"loss": 0.0,
|
41365 |
+
"step": 5908
|
41366 |
+
},
|
41367 |
+
{
|
41368 |
+
"epoch": 0.9938609032041039,
|
41369 |
+
"grad_norm": NaN,
|
41370 |
+
"learning_rate": 9.359705296346776e-09,
|
41371 |
+
"loss": 0.0,
|
41372 |
+
"step": 5909
|
41373 |
+
},
|
41374 |
+
{
|
41375 |
+
"epoch": 0.9940290976368682,
|
41376 |
+
"grad_norm": NaN,
|
41377 |
+
"learning_rate": 8.846958785418968e-09,
|
41378 |
+
"loss": 0.0,
|
41379 |
+
"step": 5910
|
41380 |
+
},
|
41381 |
+
{
|
41382 |
+
"epoch": 0.9941972920696325,
|
41383 |
+
"grad_norm": NaN,
|
41384 |
+
"learning_rate": 8.34865415867414e-09,
|
41385 |
+
"loss": 0.0,
|
41386 |
+
"step": 5911
|
41387 |
+
},
|
41388 |
+
{
|
41389 |
+
"epoch": 0.9943654865023968,
|
41390 |
+
"grad_norm": NaN,
|
41391 |
+
"learning_rate": 7.86479156006381e-09,
|
41392 |
+
"loss": 0.0,
|
41393 |
+
"step": 5912
|
41394 |
+
},
|
41395 |
+
{
|
41396 |
+
"epoch": 0.994533680935161,
|
41397 |
+
"grad_norm": NaN,
|
41398 |
+
"learning_rate": 7.3953711293706096e-09,
|
41399 |
+
"loss": 0.0,
|
41400 |
+
"step": 5913
|
41401 |
+
},
|
41402 |
+
{
|
41403 |
+
"epoch": 0.9947018753679253,
|
41404 |
+
"grad_norm": NaN,
|
41405 |
+
"learning_rate": 6.940393002202727e-09,
|
41406 |
+
"loss": 0.0,
|
41407 |
+
"step": 5914
|
41408 |
+
},
|
41409 |
+
{
|
41410 |
+
"epoch": 0.9948700698006896,
|
41411 |
+
"grad_norm": NaN,
|
41412 |
+
"learning_rate": 6.4998573100050195e-09,
|
41413 |
+
"loss": 0.0,
|
41414 |
+
"step": 5915
|
41415 |
+
},
|
41416 |
+
{
|
41417 |
+
"epoch": 0.9950382642334539,
|
41418 |
+
"grad_norm": NaN,
|
41419 |
+
"learning_rate": 6.0737641800368e-09,
|
41420 |
+
"loss": 0.0,
|
41421 |
+
"step": 5916
|
41422 |
+
},
|
41423 |
+
{
|
41424 |
+
"epoch": 0.9952064586662182,
|
41425 |
+
"grad_norm": NaN,
|
41426 |
+
"learning_rate": 5.662113735394048e-09,
|
41427 |
+
"loss": 0.0,
|
41428 |
+
"step": 5917
|
41429 |
+
},
|
41430 |
+
{
|
41431 |
+
"epoch": 0.9953746530989824,
|
41432 |
+
"grad_norm": NaN,
|
41433 |
+
"learning_rate": 5.264906095003852e-09,
|
41434 |
+
"loss": 0.0,
|
41435 |
+
"step": 5918
|
41436 |
+
},
|
41437 |
+
{
|
41438 |
+
"epoch": 0.9955428475317467,
|
41439 |
+
"grad_norm": NaN,
|
41440 |
+
"learning_rate": 4.8821413736022115e-09,
|
41441 |
+
"loss": 0.0,
|
41442 |
+
"step": 5919
|
41443 |
+
},
|
41444 |
+
{
|
41445 |
+
"epoch": 0.995711041964511,
|
41446 |
+
"grad_norm": NaN,
|
41447 |
+
"learning_rate": 4.51381968177289e-09,
|
41448 |
+
"loss": 0.0,
|
41449 |
+
"step": 5920
|
41450 |
+
},
|
41451 |
+
{
|
41452 |
+
"epoch": 0.9958792363972753,
|
41453 |
+
"grad_norm": NaN,
|
41454 |
+
"learning_rate": 4.159941125925215e-09,
|
41455 |
+
"loss": 0.0,
|
41456 |
+
"step": 5921
|
41457 |
+
},
|
41458 |
+
{
|
41459 |
+
"epoch": 0.9960474308300395,
|
41460 |
+
"grad_norm": NaN,
|
41461 |
+
"learning_rate": 3.820505808277419e-09,
|
41462 |
+
"loss": 0.0,
|
41463 |
+
"step": 5922
|
41464 |
+
},
|
41465 |
+
{
|
41466 |
+
"epoch": 0.9962156252628038,
|
41467 |
+
"grad_norm": NaN,
|
41468 |
+
"learning_rate": 3.4955138269010534e-09,
|
41469 |
+
"loss": 0.0,
|
41470 |
+
"step": 5923
|
41471 |
+
},
|
41472 |
+
{
|
41473 |
+
"epoch": 0.9963838196955681,
|
41474 |
+
"grad_norm": NaN,
|
41475 |
+
"learning_rate": 3.184965275676577e-09,
|
41476 |
+
"loss": 0.0,
|
41477 |
+
"step": 5924
|
41478 |
+
},
|
41479 |
+
{
|
41480 |
+
"epoch": 0.9965520141283324,
|
41481 |
+
"grad_norm": NaN,
|
41482 |
+
"learning_rate": 2.8888602443211122e-09,
|
41483 |
+
"loss": 0.0,
|
41484 |
+
"step": 5925
|
41485 |
+
},
|
41486 |
+
{
|
41487 |
+
"epoch": 0.9967202085610967,
|
41488 |
+
"grad_norm": NaN,
|
41489 |
+
"learning_rate": 2.607198818371792e-09,
|
41490 |
+
"loss": 0.0,
|
41491 |
+
"step": 5926
|
41492 |
+
},
|
41493 |
+
{
|
41494 |
+
"epoch": 0.996888402993861,
|
41495 |
+
"grad_norm": NaN,
|
41496 |
+
"learning_rate": 2.3399810792024133e-09,
|
41497 |
+
"loss": 0.0,
|
41498 |
+
"step": 5927
|
41499 |
+
},
|
41500 |
+
{
|
41501 |
+
"epoch": 0.9970565974266252,
|
41502 |
+
"grad_norm": NaN,
|
41503 |
+
"learning_rate": 2.087207104001232e-09,
|
41504 |
+
"loss": 0.0,
|
41505 |
+
"step": 5928
|
41506 |
+
},
|
41507 |
+
{
|
41508 |
+
"epoch": 0.9972247918593895,
|
41509 |
+
"grad_norm": NaN,
|
41510 |
+
"learning_rate": 1.8488769658042693e-09,
|
41511 |
+
"loss": 0.0,
|
41512 |
+
"step": 5929
|
41513 |
+
},
|
41514 |
+
{
|
41515 |
+
"epoch": 0.9973929862921538,
|
41516 |
+
"grad_norm": NaN,
|
41517 |
+
"learning_rate": 1.624990733450904e-09,
|
41518 |
+
"loss": 0.0,
|
41519 |
+
"step": 5930
|
41520 |
+
},
|
41521 |
+
{
|
41522 |
+
"epoch": 0.997561180724918,
|
41523 |
+
"grad_norm": NaN,
|
41524 |
+
"learning_rate": 1.4155484716227296e-09,
|
41525 |
+
"loss": 0.0,
|
41526 |
+
"step": 5931
|
41527 |
+
},
|
41528 |
+
{
|
41529 |
+
"epoch": 0.9977293751576822,
|
41530 |
+
"grad_norm": NaN,
|
41531 |
+
"learning_rate": 1.220550240826901e-09,
|
41532 |
+
"loss": 0.0,
|
41533 |
+
"step": 5932
|
41534 |
+
},
|
41535 |
+
{
|
41536 |
+
"epoch": 0.9978975695904465,
|
41537 |
+
"grad_norm": NaN,
|
41538 |
+
"learning_rate": 1.0399960974016854e-09,
|
41539 |
+
"loss": 0.0,
|
41540 |
+
"step": 5933
|
41541 |
+
},
|
41542 |
+
{
|
41543 |
+
"epoch": 0.9980657640232108,
|
41544 |
+
"grad_norm": NaN,
|
41545 |
+
"learning_rate": 8.738860934942584e-10,
|
41546 |
+
"loss": 0.0,
|
41547 |
+
"step": 5934
|
41548 |
+
},
|
41549 |
+
{
|
41550 |
+
"epoch": 0.9982339584559751,
|
41551 |
+
"grad_norm": NaN,
|
41552 |
+
"learning_rate": 7.222202770995612e-10,
|
41553 |
+
"loss": 0.0,
|
41554 |
+
"step": 5935
|
41555 |
+
},
|
41556 |
+
{
|
41557 |
+
"epoch": 0.9984021528887393,
|
41558 |
+
"grad_norm": NaN,
|
41559 |
+
"learning_rate": 5.849986920325456e-10,
|
41560 |
+
"loss": 0.0,
|
41561 |
+
"step": 5936
|
41562 |
+
},
|
41563 |
+
{
|
41564 |
+
"epoch": 0.9985703473215036,
|
41565 |
+
"grad_norm": NaN,
|
41566 |
+
"learning_rate": 4.622213779392759e-10,
|
41567 |
+
"loss": 0.0,
|
41568 |
+
"step": 5937
|
41569 |
+
},
|
41570 |
+
{
|
41571 |
+
"epoch": 0.9987385417542679,
|
41572 |
+
"grad_norm": NaN,
|
41573 |
+
"learning_rate": 3.538883702747242e-10,
|
41574 |
+
"loss": 0.0,
|
41575 |
+
"step": 5938
|
41576 |
+
},
|
41577 |
+
{
|
41578 |
+
"epoch": 0.9989067361870322,
|
41579 |
+
"grad_norm": NaN,
|
41580 |
+
"learning_rate": 2.599997003471799e-10,
|
41581 |
+
"loss": 0.0,
|
41582 |
+
"step": 5939
|
41583 |
+
},
|
41584 |
+
{
|
41585 |
+
"epoch": 0.9990749306197965,
|
41586 |
+
"grad_norm": NaN,
|
41587 |
+
"learning_rate": 1.8055539527939148e-10,
|
41588 |
+
"loss": 0.0,
|
41589 |
+
"step": 5940
|
41590 |
+
},
|
41591 |
+
{
|
41592 |
+
"epoch": 0.9992431250525607,
|
41593 |
+
"grad_norm": NaN,
|
41594 |
+
"learning_rate": 1.155554780141177e-10,
|
41595 |
+
"loss": 0.0,
|
41596 |
+
"step": 5941
|
41597 |
+
},
|
41598 |
+
{
|
41599 |
+
"epoch": 0.999411319485325,
|
41600 |
+
"grad_norm": NaN,
|
41601 |
+
"learning_rate": 6.499996733633218e-11,
|
41602 |
+
"loss": 0.0,
|
41603 |
+
"step": 5942
|
41604 |
+
},
|
41605 |
+
{
|
41606 |
+
"epoch": 0.9995795139180893,
|
41607 |
+
"grad_norm": NaN,
|
41608 |
+
"learning_rate": 2.8888877851018792e-11,
|
41609 |
+
"loss": 0.0,
|
41610 |
+
"step": 5943
|
41611 |
+
},
|
41612 |
+
{
|
41613 |
+
"epoch": 0.9997477083508536,
|
41614 |
+
"grad_norm": NaN,
|
41615 |
+
"learning_rate": 7.2222199831717406e-12,
|
41616 |
+
"loss": 0.0,
|
41617 |
+
"step": 5944
|
41618 |
+
},
|
41619 |
+
{
|
41620 |
+
"epoch": 0.9999159027836179,
|
41621 |
+
"grad_norm": NaN,
|
41622 |
+
"learning_rate": 0.0,
|
41623 |
+
"loss": 0.0,
|
41624 |
+
"step": 5945
|
41625 |
}
|
41626 |
],
|
41627 |
"logging_steps": 1,
|
|
|
41636 |
"should_evaluate": false,
|
41637 |
"should_log": false,
|
41638 |
"should_save": true,
|
41639 |
+
"should_training_stop": true
|
41640 |
},
|
41641 |
"attributes": {}
|
41642 |
}
|
41643 |
},
|
41644 |
+
"total_flos": 1.1191100899590144e+17,
|
41645 |
"train_batch_size": 8,
|
41646 |
"trial_name": null,
|
41647 |
"trial_params": null
|