Training in progress, step 5497, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1676 -3
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 516802328
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e6d99ca62d5432c7f1fa682d654ad9d6b9f6d7c03adef75d8f5c0e896145b24
|
3 |
size 516802328
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f99e22a7c9397e72a0b3305c8172ce6b91cecc53fdd4cbd0ce384bc0a90e2c7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e370ece037cd5a900281545b66ba85e14b0eaca5074bec4b2d33a4d7f503d5d3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -36813,6 +36813,1679 @@
|
|
36813 |
"learning_rate": 3.3701096528724494e-06,
|
36814 |
"loss": 0.0,
|
36815 |
"step": 5258
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36816 |
}
|
36817 |
],
|
36818 |
"logging_steps": 1,
|
@@ -36832,7 +38505,7 @@
|
|
36832 |
"attributes": {}
|
36833 |
}
|
36834 |
},
|
36835 |
-
"total_flos":
|
36836 |
"train_batch_size": 8,
|
36837 |
"trial_name": null,
|
36838 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9245647969052224,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5497,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
36813 |
"learning_rate": 3.3701096528724494e-06,
|
36814 |
"loss": 0.0,
|
36815 |
"step": 5258
|
36816 |
+
},
|
36817 |
+
{
|
36818 |
+
"epoch": 0.8845345219073248,
|
36819 |
+
"grad_norm": NaN,
|
36820 |
+
"learning_rate": 3.3604170279271374e-06,
|
36821 |
+
"loss": 0.0,
|
36822 |
+
"step": 5259
|
36823 |
+
},
|
36824 |
+
{
|
36825 |
+
"epoch": 0.8847027163400891,
|
36826 |
+
"grad_norm": NaN,
|
36827 |
+
"learning_rate": 3.350737876634957e-06,
|
36828 |
+
"loss": 0.0,
|
36829 |
+
"step": 5260
|
36830 |
+
},
|
36831 |
+
{
|
36832 |
+
"epoch": 0.8848709107728534,
|
36833 |
+
"grad_norm": NaN,
|
36834 |
+
"learning_rate": 3.3410722017920947e-06,
|
36835 |
+
"loss": 0.0,
|
36836 |
+
"step": 5261
|
36837 |
+
},
|
36838 |
+
{
|
36839 |
+
"epoch": 0.8850391052056177,
|
36840 |
+
"grad_norm": NaN,
|
36841 |
+
"learning_rate": 3.331420006190866e-06,
|
36842 |
+
"loss": 0.0,
|
36843 |
+
"step": 5262
|
36844 |
+
},
|
36845 |
+
{
|
36846 |
+
"epoch": 0.885207299638382,
|
36847 |
+
"grad_norm": NaN,
|
36848 |
+
"learning_rate": 3.32178129261968e-06,
|
36849 |
+
"loss": 0.0,
|
36850 |
+
"step": 5263
|
36851 |
+
},
|
36852 |
+
{
|
36853 |
+
"epoch": 0.8853754940711462,
|
36854 |
+
"grad_norm": NaN,
|
36855 |
+
"learning_rate": 3.3121560638630535e-06,
|
36856 |
+
"loss": 0.0,
|
36857 |
+
"step": 5264
|
36858 |
+
},
|
36859 |
+
{
|
36860 |
+
"epoch": 0.8855436885039105,
|
36861 |
+
"grad_norm": NaN,
|
36862 |
+
"learning_rate": 3.302544322701606e-06,
|
36863 |
+
"loss": 0.0,
|
36864 |
+
"step": 5265
|
36865 |
+
},
|
36866 |
+
{
|
36867 |
+
"epoch": 0.8857118829366748,
|
36868 |
+
"grad_norm": NaN,
|
36869 |
+
"learning_rate": 3.292946071912051e-06,
|
36870 |
+
"loss": 0.0,
|
36871 |
+
"step": 5266
|
36872 |
+
},
|
36873 |
+
{
|
36874 |
+
"epoch": 0.8858800773694391,
|
36875 |
+
"grad_norm": NaN,
|
36876 |
+
"learning_rate": 3.2833613142672358e-06,
|
36877 |
+
"loss": 0.0,
|
36878 |
+
"step": 5267
|
36879 |
+
},
|
36880 |
+
{
|
36881 |
+
"epoch": 0.8860482718022034,
|
36882 |
+
"grad_norm": NaN,
|
36883 |
+
"learning_rate": 3.2737900525360853e-06,
|
36884 |
+
"loss": 0.0,
|
36885 |
+
"step": 5268
|
36886 |
+
},
|
36887 |
+
{
|
36888 |
+
"epoch": 0.8862164662349676,
|
36889 |
+
"grad_norm": NaN,
|
36890 |
+
"learning_rate": 3.264232289483621e-06,
|
36891 |
+
"loss": 0.0,
|
36892 |
+
"step": 5269
|
36893 |
+
},
|
36894 |
+
{
|
36895 |
+
"epoch": 0.8863846606677319,
|
36896 |
+
"grad_norm": NaN,
|
36897 |
+
"learning_rate": 3.2546880278709725e-06,
|
36898 |
+
"loss": 0.0,
|
36899 |
+
"step": 5270
|
36900 |
+
},
|
36901 |
+
{
|
36902 |
+
"epoch": 0.8865528551004962,
|
36903 |
+
"grad_norm": NaN,
|
36904 |
+
"learning_rate": 3.245157270455379e-06,
|
36905 |
+
"loss": 0.0,
|
36906 |
+
"step": 5271
|
36907 |
+
},
|
36908 |
+
{
|
36909 |
+
"epoch": 0.8867210495332605,
|
36910 |
+
"grad_norm": NaN,
|
36911 |
+
"learning_rate": 3.235640019990166e-06,
|
36912 |
+
"loss": 0.0,
|
36913 |
+
"step": 5272
|
36914 |
+
},
|
36915 |
+
{
|
36916 |
+
"epoch": 0.8868892439660248,
|
36917 |
+
"grad_norm": NaN,
|
36918 |
+
"learning_rate": 3.226136279224762e-06,
|
36919 |
+
"loss": 0.0,
|
36920 |
+
"step": 5273
|
36921 |
+
},
|
36922 |
+
{
|
36923 |
+
"epoch": 0.887057438398789,
|
36924 |
+
"grad_norm": NaN,
|
36925 |
+
"learning_rate": 3.2166460509046814e-06,
|
36926 |
+
"loss": 0.0,
|
36927 |
+
"step": 5274
|
36928 |
+
},
|
36929 |
+
{
|
36930 |
+
"epoch": 0.8872256328315533,
|
36931 |
+
"grad_norm": NaN,
|
36932 |
+
"learning_rate": 3.207169337771565e-06,
|
36933 |
+
"loss": 0.0,
|
36934 |
+
"step": 5275
|
36935 |
+
},
|
36936 |
+
{
|
36937 |
+
"epoch": 0.8873938272643176,
|
36938 |
+
"grad_norm": NaN,
|
36939 |
+
"learning_rate": 3.1977061425631117e-06,
|
36940 |
+
"loss": 0.0,
|
36941 |
+
"step": 5276
|
36942 |
+
},
|
36943 |
+
{
|
36944 |
+
"epoch": 0.8875620216970819,
|
36945 |
+
"grad_norm": NaN,
|
36946 |
+
"learning_rate": 3.18825646801314e-06,
|
36947 |
+
"loss": 0.0,
|
36948 |
+
"step": 5277
|
36949 |
+
},
|
36950 |
+
{
|
36951 |
+
"epoch": 0.8877302161298462,
|
36952 |
+
"grad_norm": NaN,
|
36953 |
+
"learning_rate": 3.1788203168515497e-06,
|
36954 |
+
"loss": 0.0,
|
36955 |
+
"step": 5278
|
36956 |
+
},
|
36957 |
+
{
|
36958 |
+
"epoch": 0.8878984105626104,
|
36959 |
+
"grad_norm": NaN,
|
36960 |
+
"learning_rate": 3.169397691804343e-06,
|
36961 |
+
"loss": 0.0,
|
36962 |
+
"step": 5279
|
36963 |
+
},
|
36964 |
+
{
|
36965 |
+
"epoch": 0.8880666049953746,
|
36966 |
+
"grad_norm": NaN,
|
36967 |
+
"learning_rate": 3.159988595593616e-06,
|
36968 |
+
"loss": 0.0,
|
36969 |
+
"step": 5280
|
36970 |
+
},
|
36971 |
+
{
|
36972 |
+
"epoch": 0.8882347994281389,
|
36973 |
+
"grad_norm": NaN,
|
36974 |
+
"learning_rate": 3.150593030937543e-06,
|
36975 |
+
"loss": 0.0,
|
36976 |
+
"step": 5281
|
36977 |
+
},
|
36978 |
+
{
|
36979 |
+
"epoch": 0.8884029938609032,
|
36980 |
+
"grad_norm": NaN,
|
36981 |
+
"learning_rate": 3.141211000550398e-06,
|
36982 |
+
"loss": 0.0,
|
36983 |
+
"step": 5282
|
36984 |
+
},
|
36985 |
+
{
|
36986 |
+
"epoch": 0.8885711882936674,
|
36987 |
+
"grad_norm": NaN,
|
36988 |
+
"learning_rate": 3.1318425071425463e-06,
|
36989 |
+
"loss": 0.0,
|
36990 |
+
"step": 5283
|
36991 |
+
},
|
36992 |
+
{
|
36993 |
+
"epoch": 0.8887393827264317,
|
36994 |
+
"grad_norm": NaN,
|
36995 |
+
"learning_rate": 3.1224875534204445e-06,
|
36996 |
+
"loss": 0.0,
|
36997 |
+
"step": 5284
|
36998 |
+
},
|
36999 |
+
{
|
37000 |
+
"epoch": 0.888907577159196,
|
37001 |
+
"grad_norm": NaN,
|
37002 |
+
"learning_rate": 3.1131461420866313e-06,
|
37003 |
+
"loss": 0.0,
|
37004 |
+
"step": 5285
|
37005 |
+
},
|
37006 |
+
{
|
37007 |
+
"epoch": 0.8890757715919603,
|
37008 |
+
"grad_norm": NaN,
|
37009 |
+
"learning_rate": 3.1038182758397207e-06,
|
37010 |
+
"loss": 0.0,
|
37011 |
+
"step": 5286
|
37012 |
+
},
|
37013 |
+
{
|
37014 |
+
"epoch": 0.8892439660247246,
|
37015 |
+
"grad_norm": NaN,
|
37016 |
+
"learning_rate": 3.094503957374456e-06,
|
37017 |
+
"loss": 0.0,
|
37018 |
+
"step": 5287
|
37019 |
+
},
|
37020 |
+
{
|
37021 |
+
"epoch": 0.8894121604574888,
|
37022 |
+
"grad_norm": NaN,
|
37023 |
+
"learning_rate": 3.0852031893816245e-06,
|
37024 |
+
"loss": 0.0,
|
37025 |
+
"step": 5288
|
37026 |
+
},
|
37027 |
+
{
|
37028 |
+
"epoch": 0.8895803548902531,
|
37029 |
+
"grad_norm": NaN,
|
37030 |
+
"learning_rate": 3.075915974548116e-06,
|
37031 |
+
"loss": 0.0,
|
37032 |
+
"step": 5289
|
37033 |
+
},
|
37034 |
+
{
|
37035 |
+
"epoch": 0.8897485493230174,
|
37036 |
+
"grad_norm": NaN,
|
37037 |
+
"learning_rate": 3.066642315556895e-06,
|
37038 |
+
"loss": 0.0,
|
37039 |
+
"step": 5290
|
37040 |
+
},
|
37041 |
+
{
|
37042 |
+
"epoch": 0.8899167437557817,
|
37043 |
+
"grad_norm": NaN,
|
37044 |
+
"learning_rate": 3.0573822150870344e-06,
|
37045 |
+
"loss": 0.0,
|
37046 |
+
"step": 5291
|
37047 |
+
},
|
37048 |
+
{
|
37049 |
+
"epoch": 0.890084938188546,
|
37050 |
+
"grad_norm": NaN,
|
37051 |
+
"learning_rate": 3.0481356758136624e-06,
|
37052 |
+
"loss": 0.0,
|
37053 |
+
"step": 5292
|
37054 |
+
},
|
37055 |
+
{
|
37056 |
+
"epoch": 0.8902531326213102,
|
37057 |
+
"grad_norm": NaN,
|
37058 |
+
"learning_rate": 3.038902700408003e-06,
|
37059 |
+
"loss": 0.0,
|
37060 |
+
"step": 5293
|
37061 |
+
},
|
37062 |
+
{
|
37063 |
+
"epoch": 0.8904213270540745,
|
37064 |
+
"grad_norm": NaN,
|
37065 |
+
"learning_rate": 3.0296832915373497e-06,
|
37066 |
+
"loss": 0.0,
|
37067 |
+
"step": 5294
|
37068 |
+
},
|
37069 |
+
{
|
37070 |
+
"epoch": 0.8905895214868388,
|
37071 |
+
"grad_norm": NaN,
|
37072 |
+
"learning_rate": 3.020477451865106e-06,
|
37073 |
+
"loss": 0.0,
|
37074 |
+
"step": 5295
|
37075 |
+
},
|
37076 |
+
{
|
37077 |
+
"epoch": 0.8907577159196031,
|
37078 |
+
"grad_norm": NaN,
|
37079 |
+
"learning_rate": 3.0112851840507215e-06,
|
37080 |
+
"loss": 0.0,
|
37081 |
+
"step": 5296
|
37082 |
+
},
|
37083 |
+
{
|
37084 |
+
"epoch": 0.8909259103523673,
|
37085 |
+
"grad_norm": NaN,
|
37086 |
+
"learning_rate": 3.002106490749751e-06,
|
37087 |
+
"loss": 0.0,
|
37088 |
+
"step": 5297
|
37089 |
+
},
|
37090 |
+
{
|
37091 |
+
"epoch": 0.8910941047851316,
|
37092 |
+
"grad_norm": NaN,
|
37093 |
+
"learning_rate": 2.992941374613806e-06,
|
37094 |
+
"loss": 0.0,
|
37095 |
+
"step": 5298
|
37096 |
+
},
|
37097 |
+
{
|
37098 |
+
"epoch": 0.8912622992178959,
|
37099 |
+
"grad_norm": NaN,
|
37100 |
+
"learning_rate": 2.9837898382905847e-06,
|
37101 |
+
"loss": 0.0,
|
37102 |
+
"step": 5299
|
37103 |
+
},
|
37104 |
+
{
|
37105 |
+
"epoch": 0.8914304936506602,
|
37106 |
+
"grad_norm": NaN,
|
37107 |
+
"learning_rate": 2.974651884423868e-06,
|
37108 |
+
"loss": 0.0,
|
37109 |
+
"step": 5300
|
37110 |
+
},
|
37111 |
+
{
|
37112 |
+
"epoch": 0.8915986880834245,
|
37113 |
+
"grad_norm": NaN,
|
37114 |
+
"learning_rate": 2.9655275156535103e-06,
|
37115 |
+
"loss": 0.0,
|
37116 |
+
"step": 5301
|
37117 |
+
},
|
37118 |
+
{
|
37119 |
+
"epoch": 0.8917668825161887,
|
37120 |
+
"grad_norm": NaN,
|
37121 |
+
"learning_rate": 2.956416734615436e-06,
|
37122 |
+
"loss": 0.0,
|
37123 |
+
"step": 5302
|
37124 |
+
},
|
37125 |
+
{
|
37126 |
+
"epoch": 0.891935076948953,
|
37127 |
+
"grad_norm": NaN,
|
37128 |
+
"learning_rate": 2.9473195439416457e-06,
|
37129 |
+
"loss": 0.0,
|
37130 |
+
"step": 5303
|
37131 |
+
},
|
37132 |
+
{
|
37133 |
+
"epoch": 0.8921032713817173,
|
37134 |
+
"grad_norm": NaN,
|
37135 |
+
"learning_rate": 2.9382359462602206e-06,
|
37136 |
+
"loss": 0.0,
|
37137 |
+
"step": 5304
|
37138 |
+
},
|
37139 |
+
{
|
37140 |
+
"epoch": 0.8922714658144816,
|
37141 |
+
"grad_norm": NaN,
|
37142 |
+
"learning_rate": 2.9291659441953114e-06,
|
37143 |
+
"loss": 0.0,
|
37144 |
+
"step": 5305
|
37145 |
+
},
|
37146 |
+
{
|
37147 |
+
"epoch": 0.8924396602472459,
|
37148 |
+
"grad_norm": NaN,
|
37149 |
+
"learning_rate": 2.920109540367133e-06,
|
37150 |
+
"loss": 0.0,
|
37151 |
+
"step": 5306
|
37152 |
+
},
|
37153 |
+
{
|
37154 |
+
"epoch": 0.8926078546800101,
|
37155 |
+
"grad_norm": NaN,
|
37156 |
+
"learning_rate": 2.911066737391982e-06,
|
37157 |
+
"loss": 0.0,
|
37158 |
+
"step": 5307
|
37159 |
+
},
|
37160 |
+
{
|
37161 |
+
"epoch": 0.8927760491127744,
|
37162 |
+
"grad_norm": NaN,
|
37163 |
+
"learning_rate": 2.9020375378822297e-06,
|
37164 |
+
"loss": 0.0,
|
37165 |
+
"step": 5308
|
37166 |
+
},
|
37167 |
+
{
|
37168 |
+
"epoch": 0.8929442435455387,
|
37169 |
+
"grad_norm": NaN,
|
37170 |
+
"learning_rate": 2.8930219444463005e-06,
|
37171 |
+
"loss": 0.0,
|
37172 |
+
"step": 5309
|
37173 |
+
},
|
37174 |
+
{
|
37175 |
+
"epoch": 0.8931124379783029,
|
37176 |
+
"grad_norm": NaN,
|
37177 |
+
"learning_rate": 2.8840199596887105e-06,
|
37178 |
+
"loss": 0.0,
|
37179 |
+
"step": 5310
|
37180 |
+
},
|
37181 |
+
{
|
37182 |
+
"epoch": 0.8932806324110671,
|
37183 |
+
"grad_norm": NaN,
|
37184 |
+
"learning_rate": 2.8750315862100142e-06,
|
37185 |
+
"loss": 0.0,
|
37186 |
+
"step": 5311
|
37187 |
+
},
|
37188 |
+
{
|
37189 |
+
"epoch": 0.8934488268438314,
|
37190 |
+
"grad_norm": NaN,
|
37191 |
+
"learning_rate": 2.8660568266068723e-06,
|
37192 |
+
"loss": 0.0,
|
37193 |
+
"step": 5312
|
37194 |
+
},
|
37195 |
+
{
|
37196 |
+
"epoch": 0.8936170212765957,
|
37197 |
+
"grad_norm": NaN,
|
37198 |
+
"learning_rate": 2.8570956834719776e-06,
|
37199 |
+
"loss": 0.0,
|
37200 |
+
"step": 5313
|
37201 |
+
},
|
37202 |
+
{
|
37203 |
+
"epoch": 0.89378521570936,
|
37204 |
+
"grad_norm": NaN,
|
37205 |
+
"learning_rate": 2.848148159394115e-06,
|
37206 |
+
"loss": 0.0,
|
37207 |
+
"step": 5314
|
37208 |
+
},
|
37209 |
+
{
|
37210 |
+
"epoch": 0.8939534101421243,
|
37211 |
+
"grad_norm": NaN,
|
37212 |
+
"learning_rate": 2.839214256958106e-06,
|
37213 |
+
"loss": 0.0,
|
37214 |
+
"step": 5315
|
37215 |
+
},
|
37216 |
+
{
|
37217 |
+
"epoch": 0.8941216045748885,
|
37218 |
+
"grad_norm": NaN,
|
37219 |
+
"learning_rate": 2.8302939787448746e-06,
|
37220 |
+
"loss": 0.0,
|
37221 |
+
"step": 5316
|
37222 |
+
},
|
37223 |
+
{
|
37224 |
+
"epoch": 0.8942897990076528,
|
37225 |
+
"grad_norm": NaN,
|
37226 |
+
"learning_rate": 2.8213873273313873e-06,
|
37227 |
+
"loss": 0.0,
|
37228 |
+
"step": 5317
|
37229 |
+
},
|
37230 |
+
{
|
37231 |
+
"epoch": 0.8944579934404171,
|
37232 |
+
"grad_norm": NaN,
|
37233 |
+
"learning_rate": 2.8124943052906638e-06,
|
37234 |
+
"loss": 0.0,
|
37235 |
+
"step": 5318
|
37236 |
+
},
|
37237 |
+
{
|
37238 |
+
"epoch": 0.8946261878731814,
|
37239 |
+
"grad_norm": NaN,
|
37240 |
+
"learning_rate": 2.80361491519181e-06,
|
37241 |
+
"loss": 0.0,
|
37242 |
+
"step": 5319
|
37243 |
+
},
|
37244 |
+
{
|
37245 |
+
"epoch": 0.8947943823059457,
|
37246 |
+
"grad_norm": NaN,
|
37247 |
+
"learning_rate": 2.794749159599974e-06,
|
37248 |
+
"loss": 0.0,
|
37249 |
+
"step": 5320
|
37250 |
+
},
|
37251 |
+
{
|
37252 |
+
"epoch": 0.8949625767387099,
|
37253 |
+
"grad_norm": NaN,
|
37254 |
+
"learning_rate": 2.7858970410763795e-06,
|
37255 |
+
"loss": 0.0,
|
37256 |
+
"step": 5321
|
37257 |
+
},
|
37258 |
+
{
|
37259 |
+
"epoch": 0.8951307711714742,
|
37260 |
+
"grad_norm": NaN,
|
37261 |
+
"learning_rate": 2.7770585621782973e-06,
|
37262 |
+
"loss": 0.0,
|
37263 |
+
"step": 5322
|
37264 |
+
},
|
37265 |
+
{
|
37266 |
+
"epoch": 0.8952989656042385,
|
37267 |
+
"grad_norm": NaN,
|
37268 |
+
"learning_rate": 2.7682337254590684e-06,
|
37269 |
+
"loss": 0.0,
|
37270 |
+
"step": 5323
|
37271 |
+
},
|
37272 |
+
{
|
37273 |
+
"epoch": 0.8954671600370028,
|
37274 |
+
"grad_norm": NaN,
|
37275 |
+
"learning_rate": 2.759422533468092e-06,
|
37276 |
+
"loss": 0.0,
|
37277 |
+
"step": 5324
|
37278 |
+
},
|
37279 |
+
{
|
37280 |
+
"epoch": 0.895635354469767,
|
37281 |
+
"grad_norm": NaN,
|
37282 |
+
"learning_rate": 2.750624988750822e-06,
|
37283 |
+
"loss": 0.0,
|
37284 |
+
"step": 5325
|
37285 |
+
},
|
37286 |
+
{
|
37287 |
+
"epoch": 0.8958035489025313,
|
37288 |
+
"grad_norm": NaN,
|
37289 |
+
"learning_rate": 2.7418410938487736e-06,
|
37290 |
+
"loss": 0.0,
|
37291 |
+
"step": 5326
|
37292 |
+
},
|
37293 |
+
{
|
37294 |
+
"epoch": 0.8959717433352956,
|
37295 |
+
"grad_norm": NaN,
|
37296 |
+
"learning_rate": 2.7330708512994964e-06,
|
37297 |
+
"loss": 0.0,
|
37298 |
+
"step": 5327
|
37299 |
+
},
|
37300 |
+
{
|
37301 |
+
"epoch": 0.8961399377680599,
|
37302 |
+
"grad_norm": NaN,
|
37303 |
+
"learning_rate": 2.7243142636366457e-06,
|
37304 |
+
"loss": 0.0,
|
37305 |
+
"step": 5328
|
37306 |
+
},
|
37307 |
+
{
|
37308 |
+
"epoch": 0.8963081322008242,
|
37309 |
+
"grad_norm": NaN,
|
37310 |
+
"learning_rate": 2.7155713333898825e-06,
|
37311 |
+
"loss": 0.0,
|
37312 |
+
"step": 5329
|
37313 |
+
},
|
37314 |
+
{
|
37315 |
+
"epoch": 0.8964763266335884,
|
37316 |
+
"grad_norm": NaN,
|
37317 |
+
"learning_rate": 2.706842063084941e-06,
|
37318 |
+
"loss": 0.0,
|
37319 |
+
"step": 5330
|
37320 |
+
},
|
37321 |
+
{
|
37322 |
+
"epoch": 0.8966445210663527,
|
37323 |
+
"grad_norm": NaN,
|
37324 |
+
"learning_rate": 2.6981264552436105e-06,
|
37325 |
+
"loss": 0.0,
|
37326 |
+
"step": 5331
|
37327 |
+
},
|
37328 |
+
{
|
37329 |
+
"epoch": 0.896812715499117,
|
37330 |
+
"grad_norm": NaN,
|
37331 |
+
"learning_rate": 2.689424512383748e-06,
|
37332 |
+
"loss": 0.0,
|
37333 |
+
"step": 5332
|
37334 |
+
},
|
37335 |
+
{
|
37336 |
+
"epoch": 0.8969809099318813,
|
37337 |
+
"grad_norm": NaN,
|
37338 |
+
"learning_rate": 2.680736237019227e-06,
|
37339 |
+
"loss": 0.0,
|
37340 |
+
"step": 5333
|
37341 |
+
},
|
37342 |
+
{
|
37343 |
+
"epoch": 0.8971491043646456,
|
37344 |
+
"grad_norm": NaN,
|
37345 |
+
"learning_rate": 2.6720616316600056e-06,
|
37346 |
+
"loss": 0.0,
|
37347 |
+
"step": 5334
|
37348 |
+
},
|
37349 |
+
{
|
37350 |
+
"epoch": 0.8973172987974098,
|
37351 |
+
"grad_norm": NaN,
|
37352 |
+
"learning_rate": 2.663400698812074e-06,
|
37353 |
+
"loss": 0.0,
|
37354 |
+
"step": 5335
|
37355 |
+
},
|
37356 |
+
{
|
37357 |
+
"epoch": 0.8974854932301741,
|
37358 |
+
"grad_norm": NaN,
|
37359 |
+
"learning_rate": 2.654753440977481e-06,
|
37360 |
+
"loss": 0.0,
|
37361 |
+
"step": 5336
|
37362 |
+
},
|
37363 |
+
{
|
37364 |
+
"epoch": 0.8976536876629384,
|
37365 |
+
"grad_norm": NaN,
|
37366 |
+
"learning_rate": 2.646119860654317e-06,
|
37367 |
+
"loss": 0.0,
|
37368 |
+
"step": 5337
|
37369 |
+
},
|
37370 |
+
{
|
37371 |
+
"epoch": 0.8978218820957027,
|
37372 |
+
"grad_norm": NaN,
|
37373 |
+
"learning_rate": 2.6374999603367367e-06,
|
37374 |
+
"loss": 0.0,
|
37375 |
+
"step": 5338
|
37376 |
+
},
|
37377 |
+
{
|
37378 |
+
"epoch": 0.897990076528467,
|
37379 |
+
"grad_norm": NaN,
|
37380 |
+
"learning_rate": 2.6288937425149205e-06,
|
37381 |
+
"loss": 0.0,
|
37382 |
+
"step": 5339
|
37383 |
+
},
|
37384 |
+
{
|
37385 |
+
"epoch": 0.8981582709612311,
|
37386 |
+
"grad_norm": NaN,
|
37387 |
+
"learning_rate": 2.620301209675119e-06,
|
37388 |
+
"loss": 0.0,
|
37389 |
+
"step": 5340
|
37390 |
+
},
|
37391 |
+
{
|
37392 |
+
"epoch": 0.8983264653939954,
|
37393 |
+
"grad_norm": NaN,
|
37394 |
+
"learning_rate": 2.611722364299618e-06,
|
37395 |
+
"loss": 0.0,
|
37396 |
+
"step": 5341
|
37397 |
+
},
|
37398 |
+
{
|
37399 |
+
"epoch": 0.8984946598267597,
|
37400 |
+
"grad_norm": NaN,
|
37401 |
+
"learning_rate": 2.6031572088667465e-06,
|
37402 |
+
"loss": 0.0,
|
37403 |
+
"step": 5342
|
37404 |
+
},
|
37405 |
+
{
|
37406 |
+
"epoch": 0.898662854259524,
|
37407 |
+
"grad_norm": NaN,
|
37408 |
+
"learning_rate": 2.5946057458508756e-06,
|
37409 |
+
"loss": 0.0,
|
37410 |
+
"step": 5343
|
37411 |
+
},
|
37412 |
+
{
|
37413 |
+
"epoch": 0.8988310486922882,
|
37414 |
+
"grad_norm": NaN,
|
37415 |
+
"learning_rate": 2.5860679777224394e-06,
|
37416 |
+
"loss": 0.0,
|
37417 |
+
"step": 5344
|
37418 |
+
},
|
37419 |
+
{
|
37420 |
+
"epoch": 0.8989992431250525,
|
37421 |
+
"grad_norm": NaN,
|
37422 |
+
"learning_rate": 2.577543906947899e-06,
|
37423 |
+
"loss": 0.0,
|
37424 |
+
"step": 5345
|
37425 |
+
},
|
37426 |
+
{
|
37427 |
+
"epoch": 0.8991674375578168,
|
37428 |
+
"grad_norm": NaN,
|
37429 |
+
"learning_rate": 2.5690335359897564e-06,
|
37430 |
+
"loss": 0.0,
|
37431 |
+
"step": 5346
|
37432 |
+
},
|
37433 |
+
{
|
37434 |
+
"epoch": 0.8993356319905811,
|
37435 |
+
"grad_norm": NaN,
|
37436 |
+
"learning_rate": 2.5605368673065733e-06,
|
37437 |
+
"loss": 0.0,
|
37438 |
+
"step": 5347
|
37439 |
+
},
|
37440 |
+
{
|
37441 |
+
"epoch": 0.8995038264233454,
|
37442 |
+
"grad_norm": NaN,
|
37443 |
+
"learning_rate": 2.55205390335293e-06,
|
37444 |
+
"loss": 0.0,
|
37445 |
+
"step": 5348
|
37446 |
+
},
|
37447 |
+
{
|
37448 |
+
"epoch": 0.8996720208561096,
|
37449 |
+
"grad_norm": NaN,
|
37450 |
+
"learning_rate": 2.5435846465794723e-06,
|
37451 |
+
"loss": 0.0,
|
37452 |
+
"step": 5349
|
37453 |
+
},
|
37454 |
+
{
|
37455 |
+
"epoch": 0.8998402152888739,
|
37456 |
+
"grad_norm": NaN,
|
37457 |
+
"learning_rate": 2.53512909943287e-06,
|
37458 |
+
"loss": 0.0,
|
37459 |
+
"step": 5350
|
37460 |
+
},
|
37461 |
+
{
|
37462 |
+
"epoch": 0.9000084097216382,
|
37463 |
+
"grad_norm": NaN,
|
37464 |
+
"learning_rate": 2.5266872643558316e-06,
|
37465 |
+
"loss": 0.0,
|
37466 |
+
"step": 5351
|
37467 |
+
},
|
37468 |
+
{
|
37469 |
+
"epoch": 0.9001766041544025,
|
37470 |
+
"grad_norm": NaN,
|
37471 |
+
"learning_rate": 2.518259143787105e-06,
|
37472 |
+
"loss": 0.0,
|
37473 |
+
"step": 5352
|
37474 |
+
},
|
37475 |
+
{
|
37476 |
+
"epoch": 0.9003447985871668,
|
37477 |
+
"grad_norm": NaN,
|
37478 |
+
"learning_rate": 2.5098447401614934e-06,
|
37479 |
+
"loss": 0.0,
|
37480 |
+
"step": 5353
|
37481 |
+
},
|
37482 |
+
{
|
37483 |
+
"epoch": 0.900512993019931,
|
37484 |
+
"grad_norm": NaN,
|
37485 |
+
"learning_rate": 2.501444055909813e-06,
|
37486 |
+
"loss": 0.0,
|
37487 |
+
"step": 5354
|
37488 |
+
},
|
37489 |
+
{
|
37490 |
+
"epoch": 0.9006811874526953,
|
37491 |
+
"grad_norm": NaN,
|
37492 |
+
"learning_rate": 2.493057093458934e-06,
|
37493 |
+
"loss": 0.0,
|
37494 |
+
"step": 5355
|
37495 |
+
},
|
37496 |
+
{
|
37497 |
+
"epoch": 0.9008493818854596,
|
37498 |
+
"grad_norm": NaN,
|
37499 |
+
"learning_rate": 2.484683855231751e-06,
|
37500 |
+
"loss": 0.0,
|
37501 |
+
"step": 5356
|
37502 |
+
},
|
37503 |
+
{
|
37504 |
+
"epoch": 0.9010175763182239,
|
37505 |
+
"grad_norm": NaN,
|
37506 |
+
"learning_rate": 2.4763243436472016e-06,
|
37507 |
+
"loss": 0.0,
|
37508 |
+
"step": 5357
|
37509 |
+
},
|
37510 |
+
{
|
37511 |
+
"epoch": 0.9011857707509882,
|
37512 |
+
"grad_norm": NaN,
|
37513 |
+
"learning_rate": 2.467978561120249e-06,
|
37514 |
+
"loss": 0.0,
|
37515 |
+
"step": 5358
|
37516 |
+
},
|
37517 |
+
{
|
37518 |
+
"epoch": 0.9013539651837524,
|
37519 |
+
"grad_norm": NaN,
|
37520 |
+
"learning_rate": 2.459646510061908e-06,
|
37521 |
+
"loss": 0.0,
|
37522 |
+
"step": 5359
|
37523 |
+
},
|
37524 |
+
{
|
37525 |
+
"epoch": 0.9015221596165167,
|
37526 |
+
"grad_norm": NaN,
|
37527 |
+
"learning_rate": 2.4513281928791985e-06,
|
37528 |
+
"loss": 0.0,
|
37529 |
+
"step": 5360
|
37530 |
+
},
|
37531 |
+
{
|
37532 |
+
"epoch": 0.901690354049281,
|
37533 |
+
"grad_norm": NaN,
|
37534 |
+
"learning_rate": 2.443023611975204e-06,
|
37535 |
+
"loss": 0.0,
|
37536 |
+
"step": 5361
|
37537 |
+
},
|
37538 |
+
{
|
37539 |
+
"epoch": 0.9018585484820453,
|
37540 |
+
"grad_norm": NaN,
|
37541 |
+
"learning_rate": 2.434732769749015e-06,
|
37542 |
+
"loss": 0.0,
|
37543 |
+
"step": 5362
|
37544 |
+
},
|
37545 |
+
{
|
37546 |
+
"epoch": 0.9020267429148096,
|
37547 |
+
"grad_norm": NaN,
|
37548 |
+
"learning_rate": 2.426455668595773e-06,
|
37549 |
+
"loss": 0.0,
|
37550 |
+
"step": 5363
|
37551 |
+
},
|
37552 |
+
{
|
37553 |
+
"epoch": 0.9021949373475738,
|
37554 |
+
"grad_norm": NaN,
|
37555 |
+
"learning_rate": 2.418192310906625e-06,
|
37556 |
+
"loss": 0.0,
|
37557 |
+
"step": 5364
|
37558 |
+
},
|
37559 |
+
{
|
37560 |
+
"epoch": 0.9023631317803381,
|
37561 |
+
"grad_norm": NaN,
|
37562 |
+
"learning_rate": 2.409942699068779e-06,
|
37563 |
+
"loss": 0.0,
|
37564 |
+
"step": 5365
|
37565 |
+
},
|
37566 |
+
{
|
37567 |
+
"epoch": 0.9025313262131024,
|
37568 |
+
"grad_norm": NaN,
|
37569 |
+
"learning_rate": 2.4017068354654503e-06,
|
37570 |
+
"loss": 0.0,
|
37571 |
+
"step": 5366
|
37572 |
+
},
|
37573 |
+
{
|
37574 |
+
"epoch": 0.9026995206458667,
|
37575 |
+
"grad_norm": NaN,
|
37576 |
+
"learning_rate": 2.3934847224758804e-06,
|
37577 |
+
"loss": 0.0,
|
37578 |
+
"step": 5367
|
37579 |
+
},
|
37580 |
+
{
|
37581 |
+
"epoch": 0.902867715078631,
|
37582 |
+
"grad_norm": NaN,
|
37583 |
+
"learning_rate": 2.385276362475347e-06,
|
37584 |
+
"loss": 0.0,
|
37585 |
+
"step": 5368
|
37586 |
+
},
|
37587 |
+
{
|
37588 |
+
"epoch": 0.9030359095113951,
|
37589 |
+
"grad_norm": NaN,
|
37590 |
+
"learning_rate": 2.3770817578351646e-06,
|
37591 |
+
"loss": 0.0,
|
37592 |
+
"step": 5369
|
37593 |
+
},
|
37594 |
+
{
|
37595 |
+
"epoch": 0.9032041039441594,
|
37596 |
+
"grad_norm": NaN,
|
37597 |
+
"learning_rate": 2.3689009109226556e-06,
|
37598 |
+
"loss": 0.0,
|
37599 |
+
"step": 5370
|
37600 |
+
},
|
37601 |
+
{
|
37602 |
+
"epoch": 0.9033722983769237,
|
37603 |
+
"grad_norm": NaN,
|
37604 |
+
"learning_rate": 2.3607338241011747e-06,
|
37605 |
+
"loss": 0.0,
|
37606 |
+
"step": 5371
|
37607 |
+
},
|
37608 |
+
{
|
37609 |
+
"epoch": 0.903540492809688,
|
37610 |
+
"grad_norm": NaN,
|
37611 |
+
"learning_rate": 2.3525804997300893e-06,
|
37612 |
+
"loss": 0.0,
|
37613 |
+
"step": 5372
|
37614 |
+
},
|
37615 |
+
{
|
37616 |
+
"epoch": 0.9037086872424522,
|
37617 |
+
"grad_norm": NaN,
|
37618 |
+
"learning_rate": 2.344440940164827e-06,
|
37619 |
+
"loss": 0.0,
|
37620 |
+
"step": 5373
|
37621 |
+
},
|
37622 |
+
{
|
37623 |
+
"epoch": 0.9038768816752165,
|
37624 |
+
"grad_norm": NaN,
|
37625 |
+
"learning_rate": 2.336315147756807e-06,
|
37626 |
+
"loss": 0.0,
|
37627 |
+
"step": 5374
|
37628 |
+
},
|
37629 |
+
{
|
37630 |
+
"epoch": 0.9040450761079808,
|
37631 |
+
"grad_norm": NaN,
|
37632 |
+
"learning_rate": 2.328203124853473e-06,
|
37633 |
+
"loss": 0.0,
|
37634 |
+
"step": 5375
|
37635 |
+
},
|
37636 |
+
{
|
37637 |
+
"epoch": 0.9042132705407451,
|
37638 |
+
"grad_norm": NaN,
|
37639 |
+
"learning_rate": 2.3201048737983013e-06,
|
37640 |
+
"loss": 0.0,
|
37641 |
+
"step": 5376
|
37642 |
+
},
|
37643 |
+
{
|
37644 |
+
"epoch": 0.9043814649735094,
|
37645 |
+
"grad_norm": NaN,
|
37646 |
+
"learning_rate": 2.3120203969307862e-06,
|
37647 |
+
"loss": 0.0,
|
37648 |
+
"step": 5377
|
37649 |
+
},
|
37650 |
+
{
|
37651 |
+
"epoch": 0.9045496594062736,
|
37652 |
+
"grad_norm": NaN,
|
37653 |
+
"learning_rate": 2.3039496965864436e-06,
|
37654 |
+
"loss": 0.0,
|
37655 |
+
"step": 5378
|
37656 |
+
},
|
37657 |
+
{
|
37658 |
+
"epoch": 0.9047178538390379,
|
37659 |
+
"grad_norm": NaN,
|
37660 |
+
"learning_rate": 2.2958927750968083e-06,
|
37661 |
+
"loss": 0.0,
|
37662 |
+
"step": 5379
|
37663 |
+
},
|
37664 |
+
{
|
37665 |
+
"epoch": 0.9048860482718022,
|
37666 |
+
"grad_norm": NaN,
|
37667 |
+
"learning_rate": 2.287849634789424e-06,
|
37668 |
+
"loss": 0.0,
|
37669 |
+
"step": 5380
|
37670 |
+
},
|
37671 |
+
{
|
37672 |
+
"epoch": 0.9050542427045665,
|
37673 |
+
"grad_norm": NaN,
|
37674 |
+
"learning_rate": 2.2798202779878818e-06,
|
37675 |
+
"loss": 0.0,
|
37676 |
+
"step": 5381
|
37677 |
+
},
|
37678 |
+
{
|
37679 |
+
"epoch": 0.9052224371373307,
|
37680 |
+
"grad_norm": NaN,
|
37681 |
+
"learning_rate": 2.2718047070117655e-06,
|
37682 |
+
"loss": 0.0,
|
37683 |
+
"step": 5382
|
37684 |
+
},
|
37685 |
+
{
|
37686 |
+
"epoch": 0.905390631570095,
|
37687 |
+
"grad_norm": NaN,
|
37688 |
+
"learning_rate": 2.2638029241766833e-06,
|
37689 |
+
"loss": 0.0,
|
37690 |
+
"step": 5383
|
37691 |
+
},
|
37692 |
+
{
|
37693 |
+
"epoch": 0.9055588260028593,
|
37694 |
+
"grad_norm": NaN,
|
37695 |
+
"learning_rate": 2.2558149317942536e-06,
|
37696 |
+
"loss": 0.0,
|
37697 |
+
"step": 5384
|
37698 |
+
},
|
37699 |
+
{
|
37700 |
+
"epoch": 0.9057270204356236,
|
37701 |
+
"grad_norm": NaN,
|
37702 |
+
"learning_rate": 2.2478407321721296e-06,
|
37703 |
+
"loss": 0.0,
|
37704 |
+
"step": 5385
|
37705 |
+
},
|
37706 |
+
{
|
37707 |
+
"epoch": 0.9058952148683879,
|
37708 |
+
"grad_norm": NaN,
|
37709 |
+
"learning_rate": 2.2398803276139636e-06,
|
37710 |
+
"loss": 0.0,
|
37711 |
+
"step": 5386
|
37712 |
+
},
|
37713 |
+
{
|
37714 |
+
"epoch": 0.9060634093011521,
|
37715 |
+
"grad_norm": NaN,
|
37716 |
+
"learning_rate": 2.2319337204194267e-06,
|
37717 |
+
"loss": 0.0,
|
37718 |
+
"step": 5387
|
37719 |
+
},
|
37720 |
+
{
|
37721 |
+
"epoch": 0.9062316037339164,
|
37722 |
+
"grad_norm": NaN,
|
37723 |
+
"learning_rate": 2.2240009128842e-06,
|
37724 |
+
"loss": 0.0,
|
37725 |
+
"step": 5388
|
37726 |
+
},
|
37727 |
+
{
|
37728 |
+
"epoch": 0.9063997981666807,
|
37729 |
+
"grad_norm": NaN,
|
37730 |
+
"learning_rate": 2.2160819072999885e-06,
|
37731 |
+
"loss": 0.0,
|
37732 |
+
"step": 5389
|
37733 |
+
},
|
37734 |
+
{
|
37735 |
+
"epoch": 0.906567992599445,
|
37736 |
+
"grad_norm": NaN,
|
37737 |
+
"learning_rate": 2.208176705954512e-06,
|
37738 |
+
"loss": 0.0,
|
37739 |
+
"step": 5390
|
37740 |
+
},
|
37741 |
+
{
|
37742 |
+
"epoch": 0.9067361870322093,
|
37743 |
+
"grad_norm": NaN,
|
37744 |
+
"learning_rate": 2.2002853111314783e-06,
|
37745 |
+
"loss": 0.0,
|
37746 |
+
"step": 5391
|
37747 |
+
},
|
37748 |
+
{
|
37749 |
+
"epoch": 0.9069043814649735,
|
37750 |
+
"grad_norm": NaN,
|
37751 |
+
"learning_rate": 2.1924077251106347e-06,
|
37752 |
+
"loss": 0.0,
|
37753 |
+
"step": 5392
|
37754 |
+
},
|
37755 |
+
{
|
37756 |
+
"epoch": 0.9070725758977378,
|
37757 |
+
"grad_norm": NaN,
|
37758 |
+
"learning_rate": 2.1845439501677222e-06,
|
37759 |
+
"loss": 0.0,
|
37760 |
+
"step": 5393
|
37761 |
+
},
|
37762 |
+
{
|
37763 |
+
"epoch": 0.9072407703305021,
|
37764 |
+
"grad_norm": NaN,
|
37765 |
+
"learning_rate": 2.176693988574502e-06,
|
37766 |
+
"loss": 0.0,
|
37767 |
+
"step": 5394
|
37768 |
+
},
|
37769 |
+
{
|
37770 |
+
"epoch": 0.9074089647632664,
|
37771 |
+
"grad_norm": NaN,
|
37772 |
+
"learning_rate": 2.168857842598737e-06,
|
37773 |
+
"loss": 0.0,
|
37774 |
+
"step": 5395
|
37775 |
+
},
|
37776 |
+
{
|
37777 |
+
"epoch": 0.9075771591960307,
|
37778 |
+
"grad_norm": NaN,
|
37779 |
+
"learning_rate": 2.161035514504195e-06,
|
37780 |
+
"loss": 0.0,
|
37781 |
+
"step": 5396
|
37782 |
+
},
|
37783 |
+
{
|
37784 |
+
"epoch": 0.9077453536287949,
|
37785 |
+
"grad_norm": NaN,
|
37786 |
+
"learning_rate": 2.1532270065506675e-06,
|
37787 |
+
"loss": 0.0,
|
37788 |
+
"step": 5397
|
37789 |
+
},
|
37790 |
+
{
|
37791 |
+
"epoch": 0.9079135480615592,
|
37792 |
+
"grad_norm": NaN,
|
37793 |
+
"learning_rate": 2.1454323209939455e-06,
|
37794 |
+
"loss": 0.0,
|
37795 |
+
"step": 5398
|
37796 |
+
},
|
37797 |
+
{
|
37798 |
+
"epoch": 0.9080817424943234,
|
37799 |
+
"grad_norm": NaN,
|
37800 |
+
"learning_rate": 2.137651460085821e-06,
|
37801 |
+
"loss": 0.0,
|
37802 |
+
"step": 5399
|
37803 |
+
},
|
37804 |
+
{
|
37805 |
+
"epoch": 0.9082499369270877,
|
37806 |
+
"grad_norm": NaN,
|
37807 |
+
"learning_rate": 2.129884426074108e-06,
|
37808 |
+
"loss": 0.0,
|
37809 |
+
"step": 5400
|
37810 |
+
},
|
37811 |
+
{
|
37812 |
+
"epoch": 0.9084181313598519,
|
37813 |
+
"grad_norm": NaN,
|
37814 |
+
"learning_rate": 2.1221312212025947e-06,
|
37815 |
+
"loss": 0.0,
|
37816 |
+
"step": 5401
|
37817 |
+
},
|
37818 |
+
{
|
37819 |
+
"epoch": 0.9085863257926162,
|
37820 |
+
"grad_norm": NaN,
|
37821 |
+
"learning_rate": 2.114391847711117e-06,
|
37822 |
+
"loss": 0.0,
|
37823 |
+
"step": 5402
|
37824 |
+
},
|
37825 |
+
{
|
37826 |
+
"epoch": 0.9087545202253805,
|
37827 |
+
"grad_norm": NaN,
|
37828 |
+
"learning_rate": 2.1066663078354866e-06,
|
37829 |
+
"loss": 0.0,
|
37830 |
+
"step": 5403
|
37831 |
+
},
|
37832 |
+
{
|
37833 |
+
"epoch": 0.9089227146581448,
|
37834 |
+
"grad_norm": NaN,
|
37835 |
+
"learning_rate": 2.0989546038075234e-06,
|
37836 |
+
"loss": 0.0,
|
37837 |
+
"step": 5404
|
37838 |
+
},
|
37839 |
+
{
|
37840 |
+
"epoch": 0.9090909090909091,
|
37841 |
+
"grad_norm": NaN,
|
37842 |
+
"learning_rate": 2.091256737855046e-06,
|
37843 |
+
"loss": 0.0,
|
37844 |
+
"step": 5405
|
37845 |
+
},
|
37846 |
+
{
|
37847 |
+
"epoch": 0.9092591035236733,
|
37848 |
+
"grad_norm": NaN,
|
37849 |
+
"learning_rate": 2.083572712201898e-06,
|
37850 |
+
"loss": 0.0,
|
37851 |
+
"step": 5406
|
37852 |
+
},
|
37853 |
+
{
|
37854 |
+
"epoch": 0.9094272979564376,
|
37855 |
+
"grad_norm": NaN,
|
37856 |
+
"learning_rate": 2.075902529067897e-06,
|
37857 |
+
"loss": 0.0,
|
37858 |
+
"step": 5407
|
37859 |
+
},
|
37860 |
+
{
|
37861 |
+
"epoch": 0.9095954923892019,
|
37862 |
+
"grad_norm": NaN,
|
37863 |
+
"learning_rate": 2.068246190668871e-06,
|
37864 |
+
"loss": 0.0,
|
37865 |
+
"step": 5408
|
37866 |
+
},
|
37867 |
+
{
|
37868 |
+
"epoch": 0.9097636868219662,
|
37869 |
+
"grad_norm": NaN,
|
37870 |
+
"learning_rate": 2.060603699216651e-06,
|
37871 |
+
"loss": 0.0,
|
37872 |
+
"step": 5409
|
37873 |
+
},
|
37874 |
+
{
|
37875 |
+
"epoch": 0.9099318812547305,
|
37876 |
+
"grad_norm": NaN,
|
37877 |
+
"learning_rate": 2.0529750569190763e-06,
|
37878 |
+
"loss": 0.0,
|
37879 |
+
"step": 5410
|
37880 |
+
},
|
37881 |
+
{
|
37882 |
+
"epoch": 0.9101000756874947,
|
37883 |
+
"grad_norm": NaN,
|
37884 |
+
"learning_rate": 2.0453602659799677e-06,
|
37885 |
+
"loss": 0.0,
|
37886 |
+
"step": 5411
|
37887 |
+
},
|
37888 |
+
{
|
37889 |
+
"epoch": 0.910268270120259,
|
37890 |
+
"grad_norm": NaN,
|
37891 |
+
"learning_rate": 2.0377593285991594e-06,
|
37892 |
+
"loss": 0.0,
|
37893 |
+
"step": 5412
|
37894 |
+
},
|
37895 |
+
{
|
37896 |
+
"epoch": 0.9104364645530233,
|
37897 |
+
"grad_norm": NaN,
|
37898 |
+
"learning_rate": 2.0301722469724726e-06,
|
37899 |
+
"loss": 0.0,
|
37900 |
+
"step": 5413
|
37901 |
+
},
|
37902 |
+
{
|
37903 |
+
"epoch": 0.9106046589857876,
|
37904 |
+
"grad_norm": NaN,
|
37905 |
+
"learning_rate": 2.022599023291727e-06,
|
37906 |
+
"loss": 0.0,
|
37907 |
+
"step": 5414
|
37908 |
+
},
|
37909 |
+
{
|
37910 |
+
"epoch": 0.9107728534185519,
|
37911 |
+
"grad_norm": NaN,
|
37912 |
+
"learning_rate": 2.0150396597447496e-06,
|
37913 |
+
"loss": 0.0,
|
37914 |
+
"step": 5415
|
37915 |
+
},
|
37916 |
+
{
|
37917 |
+
"epoch": 0.9109410478513161,
|
37918 |
+
"grad_norm": NaN,
|
37919 |
+
"learning_rate": 2.0074941585153497e-06,
|
37920 |
+
"loss": 0.0,
|
37921 |
+
"step": 5416
|
37922 |
+
},
|
37923 |
+
{
|
37924 |
+
"epoch": 0.9111092422840804,
|
37925 |
+
"grad_norm": NaN,
|
37926 |
+
"learning_rate": 1.9999625217833384e-06,
|
37927 |
+
"loss": 0.0,
|
37928 |
+
"step": 5417
|
37929 |
+
},
|
37930 |
+
{
|
37931 |
+
"epoch": 0.9112774367168447,
|
37932 |
+
"grad_norm": NaN,
|
37933 |
+
"learning_rate": 1.992444751724526e-06,
|
37934 |
+
"loss": 0.0,
|
37935 |
+
"step": 5418
|
37936 |
+
},
|
37937 |
+
{
|
37938 |
+
"epoch": 0.911445631149609,
|
37939 |
+
"grad_norm": NaN,
|
37940 |
+
"learning_rate": 1.984940850510708e-06,
|
37941 |
+
"loss": 0.0,
|
37942 |
+
"step": 5419
|
37943 |
+
},
|
37944 |
+
{
|
37945 |
+
"epoch": 0.9116138255823732,
|
37946 |
+
"grad_norm": NaN,
|
37947 |
+
"learning_rate": 1.977450820309684e-06,
|
37948 |
+
"loss": 0.0,
|
37949 |
+
"step": 5420
|
37950 |
+
},
|
37951 |
+
{
|
37952 |
+
"epoch": 0.9117820200151375,
|
37953 |
+
"grad_norm": NaN,
|
37954 |
+
"learning_rate": 1.9699746632852234e-06,
|
37955 |
+
"loss": 0.0,
|
37956 |
+
"step": 5421
|
37957 |
+
},
|
37958 |
+
{
|
37959 |
+
"epoch": 0.9119502144479018,
|
37960 |
+
"grad_norm": NaN,
|
37961 |
+
"learning_rate": 1.9625123815971203e-06,
|
37962 |
+
"loss": 0.0,
|
37963 |
+
"step": 5422
|
37964 |
+
},
|
37965 |
+
{
|
37966 |
+
"epoch": 0.9121184088806661,
|
37967 |
+
"grad_norm": NaN,
|
37968 |
+
"learning_rate": 1.955063977401145e-06,
|
37969 |
+
"loss": 0.0,
|
37970 |
+
"step": 5423
|
37971 |
+
},
|
37972 |
+
{
|
37973 |
+
"epoch": 0.9122866033134304,
|
37974 |
+
"grad_norm": NaN,
|
37975 |
+
"learning_rate": 1.947629452849048e-06,
|
37976 |
+
"loss": 0.0,
|
37977 |
+
"step": 5424
|
37978 |
+
},
|
37979 |
+
{
|
37980 |
+
"epoch": 0.9124547977461946,
|
37981 |
+
"grad_norm": NaN,
|
37982 |
+
"learning_rate": 1.940208810088584e-06,
|
37983 |
+
"loss": 0.0,
|
37984 |
+
"step": 5425
|
37985 |
+
},
|
37986 |
+
{
|
37987 |
+
"epoch": 0.9126229921789589,
|
37988 |
+
"grad_norm": NaN,
|
37989 |
+
"learning_rate": 1.9328020512634936e-06,
|
37990 |
+
"loss": 0.0,
|
37991 |
+
"step": 5426
|
37992 |
+
},
|
37993 |
+
{
|
37994 |
+
"epoch": 0.9127911866117232,
|
37995 |
+
"grad_norm": NaN,
|
37996 |
+
"learning_rate": 1.9254091785135153e-06,
|
37997 |
+
"loss": 0.0,
|
37998 |
+
"step": 5427
|
37999 |
+
},
|
38000 |
+
{
|
38001 |
+
"epoch": 0.9129593810444874,
|
38002 |
+
"grad_norm": NaN,
|
38003 |
+
"learning_rate": 1.9180301939743516e-06,
|
38004 |
+
"loss": 0.0,
|
38005 |
+
"step": 5428
|
38006 |
+
},
|
38007 |
+
{
|
38008 |
+
"epoch": 0.9131275754772517,
|
38009 |
+
"grad_norm": NaN,
|
38010 |
+
"learning_rate": 1.9106650997777197e-06,
|
38011 |
+
"loss": 0.0,
|
38012 |
+
"step": 5429
|
38013 |
+
},
|
38014 |
+
{
|
38015 |
+
"epoch": 0.9132957699100159,
|
38016 |
+
"grad_norm": NaN,
|
38017 |
+
"learning_rate": 1.9033138980513066e-06,
|
38018 |
+
"loss": 0.0,
|
38019 |
+
"step": 5430
|
38020 |
+
},
|
38021 |
+
{
|
38022 |
+
"epoch": 0.9134639643427802,
|
38023 |
+
"grad_norm": NaN,
|
38024 |
+
"learning_rate": 1.8959765909187965e-06,
|
38025 |
+
"loss": 0.0,
|
38026 |
+
"step": 5431
|
38027 |
+
},
|
38028 |
+
{
|
38029 |
+
"epoch": 0.9136321587755445,
|
38030 |
+
"grad_norm": NaN,
|
38031 |
+
"learning_rate": 1.8886531804998553e-06,
|
38032 |
+
"loss": 0.0,
|
38033 |
+
"step": 5432
|
38034 |
+
},
|
38035 |
+
{
|
38036 |
+
"epoch": 0.9138003532083088,
|
38037 |
+
"grad_norm": NaN,
|
38038 |
+
"learning_rate": 1.8813436689101239e-06,
|
38039 |
+
"loss": 0.0,
|
38040 |
+
"step": 5433
|
38041 |
+
},
|
38042 |
+
{
|
38043 |
+
"epoch": 0.913968547641073,
|
38044 |
+
"grad_norm": NaN,
|
38045 |
+
"learning_rate": 1.8740480582612519e-06,
|
38046 |
+
"loss": 0.0,
|
38047 |
+
"step": 5434
|
38048 |
+
},
|
38049 |
+
{
|
38050 |
+
"epoch": 0.9141367420738373,
|
38051 |
+
"grad_norm": NaN,
|
38052 |
+
"learning_rate": 1.8667663506608534e-06,
|
38053 |
+
"loss": 0.0,
|
38054 |
+
"step": 5435
|
38055 |
+
},
|
38056 |
+
{
|
38057 |
+
"epoch": 0.9143049365066016,
|
38058 |
+
"grad_norm": NaN,
|
38059 |
+
"learning_rate": 1.859498548212535e-06,
|
38060 |
+
"loss": 0.0,
|
38061 |
+
"step": 5436
|
38062 |
+
},
|
38063 |
+
{
|
38064 |
+
"epoch": 0.9144731309393659,
|
38065 |
+
"grad_norm": NaN,
|
38066 |
+
"learning_rate": 1.8522446530158778e-06,
|
38067 |
+
"loss": 0.0,
|
38068 |
+
"step": 5437
|
38069 |
+
},
|
38070 |
+
{
|
38071 |
+
"epoch": 0.9146413253721302,
|
38072 |
+
"grad_norm": NaN,
|
38073 |
+
"learning_rate": 1.8450046671664555e-06,
|
38074 |
+
"loss": 0.0,
|
38075 |
+
"step": 5438
|
38076 |
+
},
|
38077 |
+
{
|
38078 |
+
"epoch": 0.9148095198048944,
|
38079 |
+
"grad_norm": NaN,
|
38080 |
+
"learning_rate": 1.8377785927558232e-06,
|
38081 |
+
"loss": 0.0,
|
38082 |
+
"step": 5439
|
38083 |
+
},
|
38084 |
+
{
|
38085 |
+
"epoch": 0.9149777142376587,
|
38086 |
+
"grad_norm": NaN,
|
38087 |
+
"learning_rate": 1.8305664318715054e-06,
|
38088 |
+
"loss": 0.0,
|
38089 |
+
"step": 5440
|
38090 |
+
},
|
38091 |
+
{
|
38092 |
+
"epoch": 0.915145908670423,
|
38093 |
+
"grad_norm": NaN,
|
38094 |
+
"learning_rate": 1.8233681865970077e-06,
|
38095 |
+
"loss": 0.0,
|
38096 |
+
"step": 5441
|
38097 |
+
},
|
38098 |
+
{
|
38099 |
+
"epoch": 0.9153141031031873,
|
38100 |
+
"grad_norm": NaN,
|
38101 |
+
"learning_rate": 1.8161838590118384e-06,
|
38102 |
+
"loss": 0.0,
|
38103 |
+
"step": 5442
|
38104 |
+
},
|
38105 |
+
{
|
38106 |
+
"epoch": 0.9154822975359516,
|
38107 |
+
"grad_norm": NaN,
|
38108 |
+
"learning_rate": 1.8090134511914658e-06,
|
38109 |
+
"loss": 0.0,
|
38110 |
+
"step": 5443
|
38111 |
+
},
|
38112 |
+
{
|
38113 |
+
"epoch": 0.9156504919687158,
|
38114 |
+
"grad_norm": NaN,
|
38115 |
+
"learning_rate": 1.8018569652073381e-06,
|
38116 |
+
"loss": 0.0,
|
38117 |
+
"step": 5444
|
38118 |
+
},
|
38119 |
+
{
|
38120 |
+
"epoch": 0.9158186864014801,
|
38121 |
+
"grad_norm": NaN,
|
38122 |
+
"learning_rate": 1.7947144031268737e-06,
|
38123 |
+
"loss": 0.0,
|
38124 |
+
"step": 5445
|
38125 |
+
},
|
38126 |
+
{
|
38127 |
+
"epoch": 0.9159868808342444,
|
38128 |
+
"grad_norm": NaN,
|
38129 |
+
"learning_rate": 1.7875857670134943e-06,
|
38130 |
+
"loss": 0.0,
|
38131 |
+
"step": 5446
|
38132 |
+
},
|
38133 |
+
{
|
38134 |
+
"epoch": 0.9161550752670087,
|
38135 |
+
"grad_norm": NaN,
|
38136 |
+
"learning_rate": 1.7804710589265805e-06,
|
38137 |
+
"loss": 0.0,
|
38138 |
+
"step": 5447
|
38139 |
+
},
|
38140 |
+
{
|
38141 |
+
"epoch": 0.916323269699773,
|
38142 |
+
"grad_norm": NaN,
|
38143 |
+
"learning_rate": 1.7733702809214825e-06,
|
38144 |
+
"loss": 0.0,
|
38145 |
+
"step": 5448
|
38146 |
+
},
|
38147 |
+
{
|
38148 |
+
"epoch": 0.9164914641325372,
|
38149 |
+
"grad_norm": NaN,
|
38150 |
+
"learning_rate": 1.7662834350495428e-06,
|
38151 |
+
"loss": 0.0,
|
38152 |
+
"step": 5449
|
38153 |
+
},
|
38154 |
+
{
|
38155 |
+
"epoch": 0.9166596585653015,
|
38156 |
+
"grad_norm": NaN,
|
38157 |
+
"learning_rate": 1.7592105233580736e-06,
|
38158 |
+
"loss": 0.0,
|
38159 |
+
"step": 5450
|
38160 |
+
},
|
38161 |
+
{
|
38162 |
+
"epoch": 0.9168278529980658,
|
38163 |
+
"grad_norm": NaN,
|
38164 |
+
"learning_rate": 1.7521515478903517e-06,
|
38165 |
+
"loss": 0.0,
|
38166 |
+
"step": 5451
|
38167 |
+
},
|
38168 |
+
{
|
38169 |
+
"epoch": 0.9169960474308301,
|
38170 |
+
"grad_norm": NaN,
|
38171 |
+
"learning_rate": 1.7451065106856458e-06,
|
38172 |
+
"loss": 0.0,
|
38173 |
+
"step": 5452
|
38174 |
+
},
|
38175 |
+
{
|
38176 |
+
"epoch": 0.9171642418635944,
|
38177 |
+
"grad_norm": NaN,
|
38178 |
+
"learning_rate": 1.7380754137791778e-06,
|
38179 |
+
"loss": 0.0,
|
38180 |
+
"step": 5453
|
38181 |
+
},
|
38182 |
+
{
|
38183 |
+
"epoch": 0.9173324362963586,
|
38184 |
+
"grad_norm": NaN,
|
38185 |
+
"learning_rate": 1.7310582592021562e-06,
|
38186 |
+
"loss": 0.0,
|
38187 |
+
"step": 5454
|
38188 |
+
},
|
38189 |
+
{
|
38190 |
+
"epoch": 0.9175006307291229,
|
38191 |
+
"grad_norm": NaN,
|
38192 |
+
"learning_rate": 1.7240550489817653e-06,
|
38193 |
+
"loss": 0.0,
|
38194 |
+
"step": 5455
|
38195 |
+
},
|
38196 |
+
{
|
38197 |
+
"epoch": 0.9176688251618872,
|
38198 |
+
"grad_norm": NaN,
|
38199 |
+
"learning_rate": 1.7170657851411476e-06,
|
38200 |
+
"loss": 0.0,
|
38201 |
+
"step": 5456
|
38202 |
+
},
|
38203 |
+
{
|
38204 |
+
"epoch": 0.9178370195946515,
|
38205 |
+
"grad_norm": NaN,
|
38206 |
+
"learning_rate": 1.7100904696994269e-06,
|
38207 |
+
"loss": 0.0,
|
38208 |
+
"step": 5457
|
38209 |
+
},
|
38210 |
+
{
|
38211 |
+
"epoch": 0.9180052140274156,
|
38212 |
+
"grad_norm": NaN,
|
38213 |
+
"learning_rate": 1.7031291046716856e-06,
|
38214 |
+
"loss": 0.0,
|
38215 |
+
"step": 5458
|
38216 |
+
},
|
38217 |
+
{
|
38218 |
+
"epoch": 0.9181734084601799,
|
38219 |
+
"grad_norm": NaN,
|
38220 |
+
"learning_rate": 1.6961816920689932e-06,
|
38221 |
+
"loss": 0.0,
|
38222 |
+
"step": 5459
|
38223 |
+
},
|
38224 |
+
{
|
38225 |
+
"epoch": 0.9183416028929442,
|
38226 |
+
"grad_norm": NaN,
|
38227 |
+
"learning_rate": 1.6892482338983828e-06,
|
38228 |
+
"loss": 0.0,
|
38229 |
+
"step": 5460
|
38230 |
+
},
|
38231 |
+
{
|
38232 |
+
"epoch": 0.9185097973257085,
|
38233 |
+
"grad_norm": NaN,
|
38234 |
+
"learning_rate": 1.6823287321628412e-06,
|
38235 |
+
"loss": 0.0,
|
38236 |
+
"step": 5461
|
38237 |
+
},
|
38238 |
+
{
|
38239 |
+
"epoch": 0.9186779917584728,
|
38240 |
+
"grad_norm": NaN,
|
38241 |
+
"learning_rate": 1.6754231888613304e-06,
|
38242 |
+
"loss": 0.0,
|
38243 |
+
"step": 5462
|
38244 |
+
},
|
38245 |
+
{
|
38246 |
+
"epoch": 0.918846186191237,
|
38247 |
+
"grad_norm": NaN,
|
38248 |
+
"learning_rate": 1.6685316059888046e-06,
|
38249 |
+
"loss": 0.0,
|
38250 |
+
"step": 5463
|
38251 |
+
},
|
38252 |
+
{
|
38253 |
+
"epoch": 0.9190143806240013,
|
38254 |
+
"grad_norm": NaN,
|
38255 |
+
"learning_rate": 1.6616539855361547e-06,
|
38256 |
+
"loss": 0.0,
|
38257 |
+
"step": 5464
|
38258 |
+
},
|
38259 |
+
{
|
38260 |
+
"epoch": 0.9191825750567656,
|
38261 |
+
"grad_norm": NaN,
|
38262 |
+
"learning_rate": 1.6547903294902468e-06,
|
38263 |
+
"loss": 0.0,
|
38264 |
+
"step": 5465
|
38265 |
+
},
|
38266 |
+
{
|
38267 |
+
"epoch": 0.9193507694895299,
|
38268 |
+
"grad_norm": NaN,
|
38269 |
+
"learning_rate": 1.647940639833917e-06,
|
38270 |
+
"loss": 0.0,
|
38271 |
+
"step": 5466
|
38272 |
+
},
|
38273 |
+
{
|
38274 |
+
"epoch": 0.9195189639222942,
|
38275 |
+
"grad_norm": NaN,
|
38276 |
+
"learning_rate": 1.6411049185459605e-06,
|
38277 |
+
"loss": 0.0,
|
38278 |
+
"step": 5467
|
38279 |
+
},
|
38280 |
+
{
|
38281 |
+
"epoch": 0.9196871583550584,
|
38282 |
+
"grad_norm": NaN,
|
38283 |
+
"learning_rate": 1.6342831676011416e-06,
|
38284 |
+
"loss": 0.0,
|
38285 |
+
"step": 5468
|
38286 |
+
},
|
38287 |
+
{
|
38288 |
+
"epoch": 0.9198553527878227,
|
38289 |
+
"grad_norm": NaN,
|
38290 |
+
"learning_rate": 1.62747538897019e-06,
|
38291 |
+
"loss": 0.0,
|
38292 |
+
"step": 5469
|
38293 |
+
},
|
38294 |
+
{
|
38295 |
+
"epoch": 0.920023547220587,
|
38296 |
+
"grad_norm": NaN,
|
38297 |
+
"learning_rate": 1.6206815846197877e-06,
|
38298 |
+
"loss": 0.0,
|
38299 |
+
"step": 5470
|
38300 |
+
},
|
38301 |
+
{
|
38302 |
+
"epoch": 0.9201917416533513,
|
38303 |
+
"grad_norm": NaN,
|
38304 |
+
"learning_rate": 1.6139017565126035e-06,
|
38305 |
+
"loss": 0.0,
|
38306 |
+
"step": 5471
|
38307 |
+
},
|
38308 |
+
{
|
38309 |
+
"epoch": 0.9203599360861155,
|
38310 |
+
"grad_norm": NaN,
|
38311 |
+
"learning_rate": 1.6071359066072433e-06,
|
38312 |
+
"loss": 0.0,
|
38313 |
+
"step": 5472
|
38314 |
+
},
|
38315 |
+
{
|
38316 |
+
"epoch": 0.9205281305188798,
|
38317 |
+
"grad_norm": NaN,
|
38318 |
+
"learning_rate": 1.6003840368582935e-06,
|
38319 |
+
"loss": 0.0,
|
38320 |
+
"step": 5473
|
38321 |
+
},
|
38322 |
+
{
|
38323 |
+
"epoch": 0.9206963249516441,
|
38324 |
+
"grad_norm": NaN,
|
38325 |
+
"learning_rate": 1.5936461492162823e-06,
|
38326 |
+
"loss": 0.0,
|
38327 |
+
"step": 5474
|
38328 |
+
},
|
38329 |
+
{
|
38330 |
+
"epoch": 0.9208645193844084,
|
38331 |
+
"grad_norm": NaN,
|
38332 |
+
"learning_rate": 1.58692224562772e-06,
|
38333 |
+
"loss": 0.0,
|
38334 |
+
"step": 5475
|
38335 |
+
},
|
38336 |
+
{
|
38337 |
+
"epoch": 0.9210327138171727,
|
38338 |
+
"grad_norm": NaN,
|
38339 |
+
"learning_rate": 1.5802123280350633e-06,
|
38340 |
+
"loss": 0.0,
|
38341 |
+
"step": 5476
|
38342 |
+
},
|
38343 |
+
{
|
38344 |
+
"epoch": 0.921200908249937,
|
38345 |
+
"grad_norm": NaN,
|
38346 |
+
"learning_rate": 1.5735163983767342e-06,
|
38347 |
+
"loss": 0.0,
|
38348 |
+
"step": 5477
|
38349 |
+
},
|
38350 |
+
{
|
38351 |
+
"epoch": 0.9213691026827012,
|
38352 |
+
"grad_norm": NaN,
|
38353 |
+
"learning_rate": 1.566834458587102e-06,
|
38354 |
+
"loss": 0.0,
|
38355 |
+
"step": 5478
|
38356 |
+
},
|
38357 |
+
{
|
38358 |
+
"epoch": 0.9215372971154655,
|
38359 |
+
"grad_norm": NaN,
|
38360 |
+
"learning_rate": 1.5601665105965168e-06,
|
38361 |
+
"loss": 0.0,
|
38362 |
+
"step": 5479
|
38363 |
+
},
|
38364 |
+
{
|
38365 |
+
"epoch": 0.9217054915482298,
|
38366 |
+
"grad_norm": NaN,
|
38367 |
+
"learning_rate": 1.5535125563312713e-06,
|
38368 |
+
"loss": 0.0,
|
38369 |
+
"step": 5480
|
38370 |
+
},
|
38371 |
+
{
|
38372 |
+
"epoch": 0.9218736859809941,
|
38373 |
+
"grad_norm": NaN,
|
38374 |
+
"learning_rate": 1.5468725977136168e-06,
|
38375 |
+
"loss": 0.0,
|
38376 |
+
"step": 5481
|
38377 |
+
},
|
38378 |
+
{
|
38379 |
+
"epoch": 0.9220418804137583,
|
38380 |
+
"grad_norm": NaN,
|
38381 |
+
"learning_rate": 1.5402466366617575e-06,
|
38382 |
+
"loss": 0.0,
|
38383 |
+
"step": 5482
|
38384 |
+
},
|
38385 |
+
{
|
38386 |
+
"epoch": 0.9222100748465226,
|
38387 |
+
"grad_norm": NaN,
|
38388 |
+
"learning_rate": 1.5336346750898678e-06,
|
38389 |
+
"loss": 0.0,
|
38390 |
+
"step": 5483
|
38391 |
+
},
|
38392 |
+
{
|
38393 |
+
"epoch": 0.9223782692792869,
|
38394 |
+
"grad_norm": NaN,
|
38395 |
+
"learning_rate": 1.52703671490807e-06,
|
38396 |
+
"loss": 0.0,
|
38397 |
+
"step": 5484
|
38398 |
+
},
|
38399 |
+
{
|
38400 |
+
"epoch": 0.9225464637120512,
|
38401 |
+
"grad_norm": NaN,
|
38402 |
+
"learning_rate": 1.5204527580224337e-06,
|
38403 |
+
"loss": 0.0,
|
38404 |
+
"step": 5485
|
38405 |
+
},
|
38406 |
+
{
|
38407 |
+
"epoch": 0.9227146581448155,
|
38408 |
+
"grad_norm": NaN,
|
38409 |
+
"learning_rate": 1.513882806334993e-06,
|
38410 |
+
"loss": 0.0,
|
38411 |
+
"step": 5486
|
38412 |
+
},
|
38413 |
+
{
|
38414 |
+
"epoch": 0.9228828525775797,
|
38415 |
+
"grad_norm": NaN,
|
38416 |
+
"learning_rate": 1.5073268617437352e-06,
|
38417 |
+
"loss": 0.0,
|
38418 |
+
"step": 5487
|
38419 |
+
},
|
38420 |
+
{
|
38421 |
+
"epoch": 0.9230510470103439,
|
38422 |
+
"grad_norm": NaN,
|
38423 |
+
"learning_rate": 1.500784926142601e-06,
|
38424 |
+
"loss": 0.0,
|
38425 |
+
"step": 5488
|
38426 |
+
},
|
38427 |
+
{
|
38428 |
+
"epoch": 0.9232192414431082,
|
38429 |
+
"grad_norm": NaN,
|
38430 |
+
"learning_rate": 1.4942570014214785e-06,
|
38431 |
+
"loss": 0.0,
|
38432 |
+
"step": 5489
|
38433 |
+
},
|
38434 |
+
{
|
38435 |
+
"epoch": 0.9233874358758725,
|
38436 |
+
"grad_norm": NaN,
|
38437 |
+
"learning_rate": 1.4877430894662036e-06,
|
38438 |
+
"loss": 0.0,
|
38439 |
+
"step": 5490
|
38440 |
+
},
|
38441 |
+
{
|
38442 |
+
"epoch": 0.9235556303086367,
|
38443 |
+
"grad_norm": NaN,
|
38444 |
+
"learning_rate": 1.481243192158588e-06,
|
38445 |
+
"loss": 0.0,
|
38446 |
+
"step": 5491
|
38447 |
+
},
|
38448 |
+
{
|
38449 |
+
"epoch": 0.923723824741401,
|
38450 |
+
"grad_norm": NaN,
|
38451 |
+
"learning_rate": 1.4747573113763735e-06,
|
38452 |
+
"loss": 0.0,
|
38453 |
+
"step": 5492
|
38454 |
+
},
|
38455 |
+
{
|
38456 |
+
"epoch": 0.9238920191741653,
|
38457 |
+
"grad_norm": NaN,
|
38458 |
+
"learning_rate": 1.4682854489932562e-06,
|
38459 |
+
"loss": 0.0,
|
38460 |
+
"step": 5493
|
38461 |
+
},
|
38462 |
+
{
|
38463 |
+
"epoch": 0.9240602136069296,
|
38464 |
+
"grad_norm": NaN,
|
38465 |
+
"learning_rate": 1.4618276068788849e-06,
|
38466 |
+
"loss": 0.0,
|
38467 |
+
"step": 5494
|
38468 |
+
},
|
38469 |
+
{
|
38470 |
+
"epoch": 0.9242284080396939,
|
38471 |
+
"grad_norm": NaN,
|
38472 |
+
"learning_rate": 1.4553837868988618e-06,
|
38473 |
+
"loss": 0.0,
|
38474 |
+
"step": 5495
|
38475 |
+
},
|
38476 |
+
{
|
38477 |
+
"epoch": 0.9243966024724581,
|
38478 |
+
"grad_norm": NaN,
|
38479 |
+
"learning_rate": 1.4489539909147365e-06,
|
38480 |
+
"loss": 0.0,
|
38481 |
+
"step": 5496
|
38482 |
+
},
|
38483 |
+
{
|
38484 |
+
"epoch": 0.9245647969052224,
|
38485 |
+
"grad_norm": NaN,
|
38486 |
+
"learning_rate": 1.4425382207839954e-06,
|
38487 |
+
"loss": 0.0,
|
38488 |
+
"step": 5497
|
38489 |
}
|
38490 |
],
|
38491 |
"logging_steps": 1,
|
|
|
38505 |
"attributes": {}
|
38506 |
}
|
38507 |
},
|
38508 |
+
"total_flos": 1.0347737895665664e+17,
|
38509 |
"train_batch_size": 8,
|
38510 |
"trial_name": null,
|
38511 |
"trial_params": null
|