Training in progress, step 2629, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2145944
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09161988ab35182d0f172df37ed2df3516cbe35067a0869c0d073c78b0f1e3a1
|
3 |
size 2145944
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4310020
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:254f73a33601897761011a4368934a7338dc1d9d6a504f80f066e6406bcbb939
|
3 |
size 4310020
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05e51ecfacb21da1acc5735448bc8ed70887cf2b3c5d1710aa9335fefd25428b
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78bb6a385f1d33d78b10d1602eb1b4b8e3b002f2928cfd515cb8f96e488f678a
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -16737,6 +16737,1679 @@
|
|
16737 |
"learning_rate": 7.940081824211049e-06,
|
16738 |
"loss": 9.4932,
|
16739 |
"step": 2390
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16740 |
}
|
16741 |
],
|
16742 |
"logging_steps": 1,
|
@@ -16756,7 +18429,7 @@
|
|
16756 |
"attributes": {}
|
16757 |
}
|
16758 |
},
|
16759 |
-
"total_flos":
|
16760 |
"train_batch_size": 4,
|
16761 |
"trial_name": null,
|
16762 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9068644360124181,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 2629,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
16737 |
"learning_rate": 7.940081824211049e-06,
|
16738 |
"loss": 9.4932,
|
16739 |
"step": 2390
|
16740 |
+
},
|
16741 |
+
{
|
16742 |
+
"epoch": 0.824767161090031,
|
16743 |
+
"grad_norm": 1.51363205909729,
|
16744 |
+
"learning_rate": 7.909762791669972e-06,
|
16745 |
+
"loss": 9.4573,
|
16746 |
+
"step": 2391
|
16747 |
+
},
|
16748 |
+
{
|
16749 |
+
"epoch": 0.8251121076233184,
|
16750 |
+
"grad_norm": 1.5144532918930054,
|
16751 |
+
"learning_rate": 7.879496783469454e-06,
|
16752 |
+
"loss": 9.3881,
|
16753 |
+
"step": 2392
|
16754 |
+
},
|
16755 |
+
{
|
16756 |
+
"epoch": 0.8254570541566058,
|
16757 |
+
"grad_norm": 1.5119389295578003,
|
16758 |
+
"learning_rate": 7.849283837737941e-06,
|
16759 |
+
"loss": 9.4617,
|
16760 |
+
"step": 2393
|
16761 |
+
},
|
16762 |
+
{
|
16763 |
+
"epoch": 0.8258020006898931,
|
16764 |
+
"grad_norm": 1.6774712800979614,
|
16765 |
+
"learning_rate": 7.819123992537042e-06,
|
16766 |
+
"loss": 9.4475,
|
16767 |
+
"step": 2394
|
16768 |
+
},
|
16769 |
+
{
|
16770 |
+
"epoch": 0.8261469472231804,
|
16771 |
+
"grad_norm": 1.540063500404358,
|
16772 |
+
"learning_rate": 7.789017285861439e-06,
|
16773 |
+
"loss": 9.4618,
|
16774 |
+
"step": 2395
|
16775 |
+
},
|
16776 |
+
{
|
16777 |
+
"epoch": 0.8264918937564677,
|
16778 |
+
"grad_norm": 1.6762762069702148,
|
16779 |
+
"learning_rate": 7.758963755638916e-06,
|
16780 |
+
"loss": 9.436,
|
16781 |
+
"step": 2396
|
16782 |
+
},
|
16783 |
+
{
|
16784 |
+
"epoch": 0.8268368402897551,
|
16785 |
+
"grad_norm": 1.5456700325012207,
|
16786 |
+
"learning_rate": 7.728963439730203e-06,
|
16787 |
+
"loss": 9.3776,
|
16788 |
+
"step": 2397
|
16789 |
+
},
|
16790 |
+
{
|
16791 |
+
"epoch": 0.8271817868230424,
|
16792 |
+
"grad_norm": 1.513611912727356,
|
16793 |
+
"learning_rate": 7.699016375929057e-06,
|
16794 |
+
"loss": 9.4647,
|
16795 |
+
"step": 2398
|
16796 |
+
},
|
16797 |
+
{
|
16798 |
+
"epoch": 0.8275267333563298,
|
16799 |
+
"grad_norm": 1.805281400680542,
|
16800 |
+
"learning_rate": 7.669122601962098e-06,
|
16801 |
+
"loss": 9.3213,
|
16802 |
+
"step": 2399
|
16803 |
+
},
|
16804 |
+
{
|
16805 |
+
"epoch": 0.8278716798896171,
|
16806 |
+
"grad_norm": 1.686733603477478,
|
16807 |
+
"learning_rate": 7.639282155488836e-06,
|
16808 |
+
"loss": 9.3407,
|
16809 |
+
"step": 2400
|
16810 |
+
},
|
16811 |
+
{
|
16812 |
+
"epoch": 0.8282166264229045,
|
16813 |
+
"grad_norm": 0.8331398963928223,
|
16814 |
+
"learning_rate": 7.609495074101614e-06,
|
16815 |
+
"loss": 9.5749,
|
16816 |
+
"step": 2401
|
16817 |
+
},
|
16818 |
+
{
|
16819 |
+
"epoch": 0.8285615729561918,
|
16820 |
+
"grad_norm": 0.9920496940612793,
|
16821 |
+
"learning_rate": 7.579761395325536e-06,
|
16822 |
+
"loss": 9.5205,
|
16823 |
+
"step": 2402
|
16824 |
+
},
|
16825 |
+
{
|
16826 |
+
"epoch": 0.8289065194894791,
|
16827 |
+
"grad_norm": 1.0967357158660889,
|
16828 |
+
"learning_rate": 7.550081156618399e-06,
|
16829 |
+
"loss": 9.4806,
|
16830 |
+
"step": 2403
|
16831 |
+
},
|
16832 |
+
{
|
16833 |
+
"epoch": 0.8292514660227664,
|
16834 |
+
"grad_norm": 1.0771796703338623,
|
16835 |
+
"learning_rate": 7.520454395370724e-06,
|
16836 |
+
"loss": 9.5243,
|
16837 |
+
"step": 2404
|
16838 |
+
},
|
16839 |
+
{
|
16840 |
+
"epoch": 0.8295964125560538,
|
16841 |
+
"grad_norm": 1.102107286453247,
|
16842 |
+
"learning_rate": 7.490881148905654e-06,
|
16843 |
+
"loss": 9.4947,
|
16844 |
+
"step": 2405
|
16845 |
+
},
|
16846 |
+
{
|
16847 |
+
"epoch": 0.8299413590893412,
|
16848 |
+
"grad_norm": 0.9728934168815613,
|
16849 |
+
"learning_rate": 7.461361454478871e-06,
|
16850 |
+
"loss": 9.5951,
|
16851 |
+
"step": 2406
|
16852 |
+
},
|
16853 |
+
{
|
16854 |
+
"epoch": 0.8302863056226285,
|
16855 |
+
"grad_norm": 1.1681954860687256,
|
16856 |
+
"learning_rate": 7.43189534927865e-06,
|
16857 |
+
"loss": 9.5276,
|
16858 |
+
"step": 2407
|
16859 |
+
},
|
16860 |
+
{
|
16861 |
+
"epoch": 0.8306312521559158,
|
16862 |
+
"grad_norm": 1.1862436532974243,
|
16863 |
+
"learning_rate": 7.402482870425725e-06,
|
16864 |
+
"loss": 9.524,
|
16865 |
+
"step": 2408
|
16866 |
+
},
|
16867 |
+
{
|
16868 |
+
"epoch": 0.8309761986892031,
|
16869 |
+
"grad_norm": 1.2328039407730103,
|
16870 |
+
"learning_rate": 7.373124054973296e-06,
|
16871 |
+
"loss": 9.5313,
|
16872 |
+
"step": 2409
|
16873 |
+
},
|
16874 |
+
{
|
16875 |
+
"epoch": 0.8313211452224906,
|
16876 |
+
"grad_norm": 1.20083749294281,
|
16877 |
+
"learning_rate": 7.343818939906916e-06,
|
16878 |
+
"loss": 9.5427,
|
16879 |
+
"step": 2410
|
16880 |
+
},
|
16881 |
+
{
|
16882 |
+
"epoch": 0.8316660917557779,
|
16883 |
+
"grad_norm": 1.2234563827514648,
|
16884 |
+
"learning_rate": 7.314567562144542e-06,
|
16885 |
+
"loss": 9.52,
|
16886 |
+
"step": 2411
|
16887 |
+
},
|
16888 |
+
{
|
16889 |
+
"epoch": 0.8320110382890652,
|
16890 |
+
"grad_norm": 1.3493642807006836,
|
16891 |
+
"learning_rate": 7.285369958536375e-06,
|
16892 |
+
"loss": 9.4352,
|
16893 |
+
"step": 2412
|
16894 |
+
},
|
16895 |
+
{
|
16896 |
+
"epoch": 0.8323559848223525,
|
16897 |
+
"grad_norm": 1.0981404781341553,
|
16898 |
+
"learning_rate": 7.2562261658649325e-06,
|
16899 |
+
"loss": 9.5559,
|
16900 |
+
"step": 2413
|
16901 |
+
},
|
16902 |
+
{
|
16903 |
+
"epoch": 0.8327009313556398,
|
16904 |
+
"grad_norm": 1.130433201789856,
|
16905 |
+
"learning_rate": 7.227136220844883e-06,
|
16906 |
+
"loss": 9.5719,
|
16907 |
+
"step": 2414
|
16908 |
+
},
|
16909 |
+
{
|
16910 |
+
"epoch": 0.8330458778889273,
|
16911 |
+
"grad_norm": 1.3468449115753174,
|
16912 |
+
"learning_rate": 7.1981001601231276e-06,
|
16913 |
+
"loss": 9.4805,
|
16914 |
+
"step": 2415
|
16915 |
+
},
|
16916 |
+
{
|
16917 |
+
"epoch": 0.8333908244222146,
|
16918 |
+
"grad_norm": 1.3730062246322632,
|
16919 |
+
"learning_rate": 7.169118020278615e-06,
|
16920 |
+
"loss": 9.4373,
|
16921 |
+
"step": 2416
|
16922 |
+
},
|
16923 |
+
{
|
16924 |
+
"epoch": 0.8337357709555019,
|
16925 |
+
"grad_norm": 1.30331289768219,
|
16926 |
+
"learning_rate": 7.140189837822409e-06,
|
16927 |
+
"loss": 9.4876,
|
16928 |
+
"step": 2417
|
16929 |
+
},
|
16930 |
+
{
|
16931 |
+
"epoch": 0.8340807174887892,
|
16932 |
+
"grad_norm": 1.2246065139770508,
|
16933 |
+
"learning_rate": 7.111315649197603e-06,
|
16934 |
+
"loss": 9.523,
|
16935 |
+
"step": 2418
|
16936 |
+
},
|
16937 |
+
{
|
16938 |
+
"epoch": 0.8344256640220766,
|
16939 |
+
"grad_norm": 1.301967740058899,
|
16940 |
+
"learning_rate": 7.082495490779229e-06,
|
16941 |
+
"loss": 9.4755,
|
16942 |
+
"step": 2419
|
16943 |
+
},
|
16944 |
+
{
|
16945 |
+
"epoch": 0.834770610555364,
|
16946 |
+
"grad_norm": 1.2529209852218628,
|
16947 |
+
"learning_rate": 7.053729398874298e-06,
|
16948 |
+
"loss": 9.5024,
|
16949 |
+
"step": 2420
|
16950 |
+
},
|
16951 |
+
{
|
16952 |
+
"epoch": 0.8351155570886513,
|
16953 |
+
"grad_norm": 1.276577115058899,
|
16954 |
+
"learning_rate": 7.025017409721657e-06,
|
16955 |
+
"loss": 9.6069,
|
16956 |
+
"step": 2421
|
16957 |
+
},
|
16958 |
+
{
|
16959 |
+
"epoch": 0.8354605036219386,
|
16960 |
+
"grad_norm": 1.323262095451355,
|
16961 |
+
"learning_rate": 6.996359559492083e-06,
|
16962 |
+
"loss": 9.4286,
|
16963 |
+
"step": 2422
|
16964 |
+
},
|
16965 |
+
{
|
16966 |
+
"epoch": 0.8358054501552259,
|
16967 |
+
"grad_norm": 1.3153644800186157,
|
16968 |
+
"learning_rate": 6.967755884288046e-06,
|
16969 |
+
"loss": 9.5094,
|
16970 |
+
"step": 2423
|
16971 |
+
},
|
16972 |
+
{
|
16973 |
+
"epoch": 0.8361503966885133,
|
16974 |
+
"grad_norm": 1.23281991481781,
|
16975 |
+
"learning_rate": 6.939206420143857e-06,
|
16976 |
+
"loss": 9.4647,
|
16977 |
+
"step": 2424
|
16978 |
+
},
|
16979 |
+
{
|
16980 |
+
"epoch": 0.8364953432218006,
|
16981 |
+
"grad_norm": 1.269282341003418,
|
16982 |
+
"learning_rate": 6.910711203025455e-06,
|
16983 |
+
"loss": 9.5021,
|
16984 |
+
"step": 2425
|
16985 |
+
},
|
16986 |
+
{
|
16987 |
+
"epoch": 0.836840289755088,
|
16988 |
+
"grad_norm": 1.2297919988632202,
|
16989 |
+
"learning_rate": 6.882270268830498e-06,
|
16990 |
+
"loss": 9.4929,
|
16991 |
+
"step": 2426
|
16992 |
+
},
|
16993 |
+
{
|
16994 |
+
"epoch": 0.8371852362883753,
|
16995 |
+
"grad_norm": 1.2677528858184814,
|
16996 |
+
"learning_rate": 6.853883653388249e-06,
|
16997 |
+
"loss": 9.4232,
|
16998 |
+
"step": 2427
|
16999 |
+
},
|
17000 |
+
{
|
17001 |
+
"epoch": 0.8375301828216627,
|
17002 |
+
"grad_norm": 1.2528631687164307,
|
17003 |
+
"learning_rate": 6.825551392459511e-06,
|
17004 |
+
"loss": 9.4827,
|
17005 |
+
"step": 2428
|
17006 |
+
},
|
17007 |
+
{
|
17008 |
+
"epoch": 0.83787512935495,
|
17009 |
+
"grad_norm": 1.3022857904434204,
|
17010 |
+
"learning_rate": 6.797273521736641e-06,
|
17011 |
+
"loss": 9.4533,
|
17012 |
+
"step": 2429
|
17013 |
+
},
|
17014 |
+
{
|
17015 |
+
"epoch": 0.8382200758882373,
|
17016 |
+
"grad_norm": 1.4493368864059448,
|
17017 |
+
"learning_rate": 6.769050076843469e-06,
|
17018 |
+
"loss": 9.4251,
|
17019 |
+
"step": 2430
|
17020 |
+
},
|
17021 |
+
{
|
17022 |
+
"epoch": 0.8385650224215246,
|
17023 |
+
"grad_norm": 1.4479137659072876,
|
17024 |
+
"learning_rate": 6.740881093335278e-06,
|
17025 |
+
"loss": 9.4296,
|
17026 |
+
"step": 2431
|
17027 |
+
},
|
17028 |
+
{
|
17029 |
+
"epoch": 0.838909968954812,
|
17030 |
+
"grad_norm": 1.3461737632751465,
|
17031 |
+
"learning_rate": 6.712766606698689e-06,
|
17032 |
+
"loss": 9.5194,
|
17033 |
+
"step": 2432
|
17034 |
+
},
|
17035 |
+
{
|
17036 |
+
"epoch": 0.8392549154880994,
|
17037 |
+
"grad_norm": 1.3478329181671143,
|
17038 |
+
"learning_rate": 6.684706652351741e-06,
|
17039 |
+
"loss": 9.4739,
|
17040 |
+
"step": 2433
|
17041 |
+
},
|
17042 |
+
{
|
17043 |
+
"epoch": 0.8395998620213867,
|
17044 |
+
"grad_norm": 1.5345542430877686,
|
17045 |
+
"learning_rate": 6.656701265643711e-06,
|
17046 |
+
"loss": 9.4211,
|
17047 |
+
"step": 2434
|
17048 |
+
},
|
17049 |
+
{
|
17050 |
+
"epoch": 0.839944808554674,
|
17051 |
+
"grad_norm": 1.2493342161178589,
|
17052 |
+
"learning_rate": 6.628750481855167e-06,
|
17053 |
+
"loss": 9.4707,
|
17054 |
+
"step": 2435
|
17055 |
+
},
|
17056 |
+
{
|
17057 |
+
"epoch": 0.8402897550879613,
|
17058 |
+
"grad_norm": 1.3178075551986694,
|
17059 |
+
"learning_rate": 6.6008543361978814e-06,
|
17060 |
+
"loss": 9.4852,
|
17061 |
+
"step": 2436
|
17062 |
+
},
|
17063 |
+
{
|
17064 |
+
"epoch": 0.8406347016212488,
|
17065 |
+
"grad_norm": 1.4194328784942627,
|
17066 |
+
"learning_rate": 6.5730128638148095e-06,
|
17067 |
+
"loss": 9.4392,
|
17068 |
+
"step": 2437
|
17069 |
+
},
|
17070 |
+
{
|
17071 |
+
"epoch": 0.8409796481545361,
|
17072 |
+
"grad_norm": 1.4278353452682495,
|
17073 |
+
"learning_rate": 6.545226099779994e-06,
|
17074 |
+
"loss": 9.4369,
|
17075 |
+
"step": 2438
|
17076 |
+
},
|
17077 |
+
{
|
17078 |
+
"epoch": 0.8413245946878234,
|
17079 |
+
"grad_norm": 1.4180926084518433,
|
17080 |
+
"learning_rate": 6.517494079098585e-06,
|
17081 |
+
"loss": 9.4777,
|
17082 |
+
"step": 2439
|
17083 |
+
},
|
17084 |
+
{
|
17085 |
+
"epoch": 0.8416695412211107,
|
17086 |
+
"grad_norm": 1.3950679302215576,
|
17087 |
+
"learning_rate": 6.489816836706786e-06,
|
17088 |
+
"loss": 9.4472,
|
17089 |
+
"step": 2440
|
17090 |
+
},
|
17091 |
+
{
|
17092 |
+
"epoch": 0.842014487754398,
|
17093 |
+
"grad_norm": 1.5117658376693726,
|
17094 |
+
"learning_rate": 6.462194407471733e-06,
|
17095 |
+
"loss": 9.4362,
|
17096 |
+
"step": 2441
|
17097 |
+
},
|
17098 |
+
{
|
17099 |
+
"epoch": 0.8423594342876854,
|
17100 |
+
"grad_norm": 1.452319622039795,
|
17101 |
+
"learning_rate": 6.43462682619157e-06,
|
17102 |
+
"loss": 9.443,
|
17103 |
+
"step": 2442
|
17104 |
+
},
|
17105 |
+
{
|
17106 |
+
"epoch": 0.8427043808209728,
|
17107 |
+
"grad_norm": 1.3729323148727417,
|
17108 |
+
"learning_rate": 6.407114127595304e-06,
|
17109 |
+
"loss": 9.4496,
|
17110 |
+
"step": 2443
|
17111 |
+
},
|
17112 |
+
{
|
17113 |
+
"epoch": 0.8430493273542601,
|
17114 |
+
"grad_norm": 1.5410650968551636,
|
17115 |
+
"learning_rate": 6.379656346342844e-06,
|
17116 |
+
"loss": 9.4024,
|
17117 |
+
"step": 2444
|
17118 |
+
},
|
17119 |
+
{
|
17120 |
+
"epoch": 0.8433942738875474,
|
17121 |
+
"grad_norm": 1.4286586046218872,
|
17122 |
+
"learning_rate": 6.352253517024859e-06,
|
17123 |
+
"loss": 9.4946,
|
17124 |
+
"step": 2445
|
17125 |
+
},
|
17126 |
+
{
|
17127 |
+
"epoch": 0.8437392204208348,
|
17128 |
+
"grad_norm": 1.544579267501831,
|
17129 |
+
"learning_rate": 6.324905674162846e-06,
|
17130 |
+
"loss": 9.3807,
|
17131 |
+
"step": 2446
|
17132 |
+
},
|
17133 |
+
{
|
17134 |
+
"epoch": 0.8440841669541221,
|
17135 |
+
"grad_norm": 1.5438258647918701,
|
17136 |
+
"learning_rate": 6.297612852208978e-06,
|
17137 |
+
"loss": 9.4389,
|
17138 |
+
"step": 2447
|
17139 |
+
},
|
17140 |
+
{
|
17141 |
+
"epoch": 0.8444291134874095,
|
17142 |
+
"grad_norm": 1.5225231647491455,
|
17143 |
+
"learning_rate": 6.2703750855461654e-06,
|
17144 |
+
"loss": 9.3885,
|
17145 |
+
"step": 2448
|
17146 |
+
},
|
17147 |
+
{
|
17148 |
+
"epoch": 0.8447740600206968,
|
17149 |
+
"grad_norm": 1.6549159288406372,
|
17150 |
+
"learning_rate": 6.24319240848793e-06,
|
17151 |
+
"loss": 9.4022,
|
17152 |
+
"step": 2449
|
17153 |
+
},
|
17154 |
+
{
|
17155 |
+
"epoch": 0.8451190065539841,
|
17156 |
+
"grad_norm": 1.7702971696853638,
|
17157 |
+
"learning_rate": 6.216064855278414e-06,
|
17158 |
+
"loss": 9.3768,
|
17159 |
+
"step": 2450
|
17160 |
+
},
|
17161 |
+
{
|
17162 |
+
"epoch": 0.8454639530872715,
|
17163 |
+
"grad_norm": 0.8622440695762634,
|
17164 |
+
"learning_rate": 6.188992460092286e-06,
|
17165 |
+
"loss": 9.5452,
|
17166 |
+
"step": 2451
|
17167 |
+
},
|
17168 |
+
{
|
17169 |
+
"epoch": 0.8458088996205588,
|
17170 |
+
"grad_norm": 1.0240706205368042,
|
17171 |
+
"learning_rate": 6.161975257034741e-06,
|
17172 |
+
"loss": 9.5128,
|
17173 |
+
"step": 2452
|
17174 |
+
},
|
17175 |
+
{
|
17176 |
+
"epoch": 0.8461538461538461,
|
17177 |
+
"grad_norm": 1.021040678024292,
|
17178 |
+
"learning_rate": 6.135013280141477e-06,
|
17179 |
+
"loss": 9.5117,
|
17180 |
+
"step": 2453
|
17181 |
+
},
|
17182 |
+
{
|
17183 |
+
"epoch": 0.8464987926871335,
|
17184 |
+
"grad_norm": 1.160355806350708,
|
17185 |
+
"learning_rate": 6.108106563378557e-06,
|
17186 |
+
"loss": 9.5123,
|
17187 |
+
"step": 2454
|
17188 |
+
},
|
17189 |
+
{
|
17190 |
+
"epoch": 0.8468437392204209,
|
17191 |
+
"grad_norm": 1.145014762878418,
|
17192 |
+
"learning_rate": 6.081255140642483e-06,
|
17193 |
+
"loss": 9.4774,
|
17194 |
+
"step": 2455
|
17195 |
+
},
|
17196 |
+
{
|
17197 |
+
"epoch": 0.8471886857537082,
|
17198 |
+
"grad_norm": 1.0997885465621948,
|
17199 |
+
"learning_rate": 6.054459045760053e-06,
|
17200 |
+
"loss": 9.531,
|
17201 |
+
"step": 2456
|
17202 |
+
},
|
17203 |
+
{
|
17204 |
+
"epoch": 0.8475336322869955,
|
17205 |
+
"grad_norm": 1.057318925857544,
|
17206 |
+
"learning_rate": 6.027718312488423e-06,
|
17207 |
+
"loss": 9.5878,
|
17208 |
+
"step": 2457
|
17209 |
+
},
|
17210 |
+
{
|
17211 |
+
"epoch": 0.8478785788202828,
|
17212 |
+
"grad_norm": 1.2089539766311646,
|
17213 |
+
"learning_rate": 6.001032974514947e-06,
|
17214 |
+
"loss": 9.4941,
|
17215 |
+
"step": 2458
|
17216 |
+
},
|
17217 |
+
{
|
17218 |
+
"epoch": 0.8482235253535702,
|
17219 |
+
"grad_norm": 1.2328418493270874,
|
17220 |
+
"learning_rate": 5.974403065457235e-06,
|
17221 |
+
"loss": 9.4992,
|
17222 |
+
"step": 2459
|
17223 |
+
},
|
17224 |
+
{
|
17225 |
+
"epoch": 0.8485684718868576,
|
17226 |
+
"grad_norm": 1.1309876441955566,
|
17227 |
+
"learning_rate": 5.947828618863027e-06,
|
17228 |
+
"loss": 9.544,
|
17229 |
+
"step": 2460
|
17230 |
+
},
|
17231 |
+
{
|
17232 |
+
"epoch": 0.8489134184201449,
|
17233 |
+
"grad_norm": 1.2640806436538696,
|
17234 |
+
"learning_rate": 5.921309668210234e-06,
|
17235 |
+
"loss": 9.4976,
|
17236 |
+
"step": 2461
|
17237 |
+
},
|
17238 |
+
{
|
17239 |
+
"epoch": 0.8492583649534322,
|
17240 |
+
"grad_norm": 1.1987804174423218,
|
17241 |
+
"learning_rate": 5.894846246906843e-06,
|
17242 |
+
"loss": 9.5373,
|
17243 |
+
"step": 2462
|
17244 |
+
},
|
17245 |
+
{
|
17246 |
+
"epoch": 0.8496033114867195,
|
17247 |
+
"grad_norm": 1.2217447757720947,
|
17248 |
+
"learning_rate": 5.868438388290854e-06,
|
17249 |
+
"loss": 9.4895,
|
17250 |
+
"step": 2463
|
17251 |
+
},
|
17252 |
+
{
|
17253 |
+
"epoch": 0.849948258020007,
|
17254 |
+
"grad_norm": 1.1472078561782837,
|
17255 |
+
"learning_rate": 5.8420861256303415e-06,
|
17256 |
+
"loss": 9.5564,
|
17257 |
+
"step": 2464
|
17258 |
+
},
|
17259 |
+
{
|
17260 |
+
"epoch": 0.8502932045532943,
|
17261 |
+
"grad_norm": 1.3079042434692383,
|
17262 |
+
"learning_rate": 5.815789492123258e-06,
|
17263 |
+
"loss": 9.4928,
|
17264 |
+
"step": 2465
|
17265 |
+
},
|
17266 |
+
{
|
17267 |
+
"epoch": 0.8506381510865816,
|
17268 |
+
"grad_norm": 1.388771891593933,
|
17269 |
+
"learning_rate": 5.7895485208975365e-06,
|
17270 |
+
"loss": 9.5433,
|
17271 |
+
"step": 2466
|
17272 |
+
},
|
17273 |
+
{
|
17274 |
+
"epoch": 0.8509830976198689,
|
17275 |
+
"grad_norm": 1.3080216646194458,
|
17276 |
+
"learning_rate": 5.76336324501095e-06,
|
17277 |
+
"loss": 9.4568,
|
17278 |
+
"step": 2467
|
17279 |
+
},
|
17280 |
+
{
|
17281 |
+
"epoch": 0.8513280441531562,
|
17282 |
+
"grad_norm": 1.1682881116867065,
|
17283 |
+
"learning_rate": 5.737233697451145e-06,
|
17284 |
+
"loss": 9.5277,
|
17285 |
+
"step": 2468
|
17286 |
+
},
|
17287 |
+
{
|
17288 |
+
"epoch": 0.8516729906864436,
|
17289 |
+
"grad_norm": 1.3328065872192383,
|
17290 |
+
"learning_rate": 5.7111599111355215e-06,
|
17291 |
+
"loss": 9.4417,
|
17292 |
+
"step": 2469
|
17293 |
+
},
|
17294 |
+
{
|
17295 |
+
"epoch": 0.852017937219731,
|
17296 |
+
"grad_norm": 1.3096672296524048,
|
17297 |
+
"learning_rate": 5.685141918911257e-06,
|
17298 |
+
"loss": 9.4382,
|
17299 |
+
"step": 2470
|
17300 |
+
},
|
17301 |
+
{
|
17302 |
+
"epoch": 0.8523628837530183,
|
17303 |
+
"grad_norm": 1.3736581802368164,
|
17304 |
+
"learning_rate": 5.659179753555244e-06,
|
17305 |
+
"loss": 9.4432,
|
17306 |
+
"step": 2471
|
17307 |
+
},
|
17308 |
+
{
|
17309 |
+
"epoch": 0.8527078302863056,
|
17310 |
+
"grad_norm": 1.2530180215835571,
|
17311 |
+
"learning_rate": 5.633273447774046e-06,
|
17312 |
+
"loss": 9.4777,
|
17313 |
+
"step": 2472
|
17314 |
+
},
|
17315 |
+
{
|
17316 |
+
"epoch": 0.853052776819593,
|
17317 |
+
"grad_norm": 1.279069423675537,
|
17318 |
+
"learning_rate": 5.607423034203829e-06,
|
17319 |
+
"loss": 9.538,
|
17320 |
+
"step": 2473
|
17321 |
+
},
|
17322 |
+
{
|
17323 |
+
"epoch": 0.8533977233528803,
|
17324 |
+
"grad_norm": 1.3018614053726196,
|
17325 |
+
"learning_rate": 5.581628545410372e-06,
|
17326 |
+
"loss": 9.4599,
|
17327 |
+
"step": 2474
|
17328 |
+
},
|
17329 |
+
{
|
17330 |
+
"epoch": 0.8537426698861676,
|
17331 |
+
"grad_norm": 1.3087834119796753,
|
17332 |
+
"learning_rate": 5.555890013889009e-06,
|
17333 |
+
"loss": 9.503,
|
17334 |
+
"step": 2475
|
17335 |
+
},
|
17336 |
+
{
|
17337 |
+
"epoch": 0.854087616419455,
|
17338 |
+
"grad_norm": 1.349788784980774,
|
17339 |
+
"learning_rate": 5.530207472064552e-06,
|
17340 |
+
"loss": 9.5043,
|
17341 |
+
"step": 2476
|
17342 |
+
},
|
17343 |
+
{
|
17344 |
+
"epoch": 0.8544325629527423,
|
17345 |
+
"grad_norm": 1.2353582382202148,
|
17346 |
+
"learning_rate": 5.504580952291294e-06,
|
17347 |
+
"loss": 9.5006,
|
17348 |
+
"step": 2477
|
17349 |
+
},
|
17350 |
+
{
|
17351 |
+
"epoch": 0.8547775094860297,
|
17352 |
+
"grad_norm": 1.3644788265228271,
|
17353 |
+
"learning_rate": 5.479010486852959e-06,
|
17354 |
+
"loss": 9.4729,
|
17355 |
+
"step": 2478
|
17356 |
+
},
|
17357 |
+
{
|
17358 |
+
"epoch": 0.855122456019317,
|
17359 |
+
"grad_norm": 1.2602345943450928,
|
17360 |
+
"learning_rate": 5.453496107962658e-06,
|
17361 |
+
"loss": 9.5259,
|
17362 |
+
"step": 2479
|
17363 |
+
},
|
17364 |
+
{
|
17365 |
+
"epoch": 0.8554674025526043,
|
17366 |
+
"grad_norm": 1.4060320854187012,
|
17367 |
+
"learning_rate": 5.428037847762813e-06,
|
17368 |
+
"loss": 9.5535,
|
17369 |
+
"step": 2480
|
17370 |
+
},
|
17371 |
+
{
|
17372 |
+
"epoch": 0.8558123490858917,
|
17373 |
+
"grad_norm": 1.5353844165802002,
|
17374 |
+
"learning_rate": 5.40263573832519e-06,
|
17375 |
+
"loss": 9.4855,
|
17376 |
+
"step": 2481
|
17377 |
+
},
|
17378 |
+
{
|
17379 |
+
"epoch": 0.8561572956191791,
|
17380 |
+
"grad_norm": 1.502706527709961,
|
17381 |
+
"learning_rate": 5.377289811650782e-06,
|
17382 |
+
"loss": 9.3855,
|
17383 |
+
"step": 2482
|
17384 |
+
},
|
17385 |
+
{
|
17386 |
+
"epoch": 0.8565022421524664,
|
17387 |
+
"grad_norm": 1.2772150039672852,
|
17388 |
+
"learning_rate": 5.35200009966983e-06,
|
17389 |
+
"loss": 9.5104,
|
17390 |
+
"step": 2483
|
17391 |
+
},
|
17392 |
+
{
|
17393 |
+
"epoch": 0.8568471886857537,
|
17394 |
+
"grad_norm": 1.388257622718811,
|
17395 |
+
"learning_rate": 5.326766634241748e-06,
|
17396 |
+
"loss": 9.4841,
|
17397 |
+
"step": 2484
|
17398 |
+
},
|
17399 |
+
{
|
17400 |
+
"epoch": 0.857192135219041,
|
17401 |
+
"grad_norm": 1.3631497621536255,
|
17402 |
+
"learning_rate": 5.301589447155092e-06,
|
17403 |
+
"loss": 9.4448,
|
17404 |
+
"step": 2485
|
17405 |
+
},
|
17406 |
+
{
|
17407 |
+
"epoch": 0.8575370817523283,
|
17408 |
+
"grad_norm": 1.4563467502593994,
|
17409 |
+
"learning_rate": 5.27646857012753e-06,
|
17410 |
+
"loss": 9.4469,
|
17411 |
+
"step": 2486
|
17412 |
+
},
|
17413 |
+
{
|
17414 |
+
"epoch": 0.8578820282856158,
|
17415 |
+
"grad_norm": 1.3632886409759521,
|
17416 |
+
"learning_rate": 5.251404034805768e-06,
|
17417 |
+
"loss": 9.4865,
|
17418 |
+
"step": 2487
|
17419 |
+
},
|
17420 |
+
{
|
17421 |
+
"epoch": 0.8582269748189031,
|
17422 |
+
"grad_norm": 1.3538519144058228,
|
17423 |
+
"learning_rate": 5.226395872765555e-06,
|
17424 |
+
"loss": 9.4439,
|
17425 |
+
"step": 2488
|
17426 |
+
},
|
17427 |
+
{
|
17428 |
+
"epoch": 0.8585719213521904,
|
17429 |
+
"grad_norm": 1.3481531143188477,
|
17430 |
+
"learning_rate": 5.201444115511605e-06,
|
17431 |
+
"loss": 9.52,
|
17432 |
+
"step": 2489
|
17433 |
+
},
|
17434 |
+
{
|
17435 |
+
"epoch": 0.8589168678854777,
|
17436 |
+
"grad_norm": 1.370755910873413,
|
17437 |
+
"learning_rate": 5.176548794477598e-06,
|
17438 |
+
"loss": 9.4518,
|
17439 |
+
"step": 2490
|
17440 |
+
},
|
17441 |
+
{
|
17442 |
+
"epoch": 0.8592618144187651,
|
17443 |
+
"grad_norm": 1.4279519319534302,
|
17444 |
+
"learning_rate": 5.151709941026078e-06,
|
17445 |
+
"loss": 9.4622,
|
17446 |
+
"step": 2491
|
17447 |
+
},
|
17448 |
+
{
|
17449 |
+
"epoch": 0.8596067609520525,
|
17450 |
+
"grad_norm": 1.441394329071045,
|
17451 |
+
"learning_rate": 5.126927586448516e-06,
|
17452 |
+
"loss": 9.4812,
|
17453 |
+
"step": 2492
|
17454 |
+
},
|
17455 |
+
{
|
17456 |
+
"epoch": 0.8599517074853398,
|
17457 |
+
"grad_norm": 1.4180115461349487,
|
17458 |
+
"learning_rate": 5.1022017619651415e-06,
|
17459 |
+
"loss": 9.4861,
|
17460 |
+
"step": 2493
|
17461 |
+
},
|
17462 |
+
{
|
17463 |
+
"epoch": 0.8602966540186271,
|
17464 |
+
"grad_norm": 1.5045257806777954,
|
17465 |
+
"learning_rate": 5.077532498725013e-06,
|
17466 |
+
"loss": 9.4093,
|
17467 |
+
"step": 2494
|
17468 |
+
},
|
17469 |
+
{
|
17470 |
+
"epoch": 0.8606416005519144,
|
17471 |
+
"grad_norm": 1.4450771808624268,
|
17472 |
+
"learning_rate": 5.052919827805891e-06,
|
17473 |
+
"loss": 9.4047,
|
17474 |
+
"step": 2495
|
17475 |
+
},
|
17476 |
+
{
|
17477 |
+
"epoch": 0.8609865470852018,
|
17478 |
+
"grad_norm": 1.6171228885650635,
|
17479 |
+
"learning_rate": 5.02836378021429e-06,
|
17480 |
+
"loss": 9.3855,
|
17481 |
+
"step": 2496
|
17482 |
+
},
|
17483 |
+
{
|
17484 |
+
"epoch": 0.8613314936184892,
|
17485 |
+
"grad_norm": 1.4856090545654297,
|
17486 |
+
"learning_rate": 5.003864386885376e-06,
|
17487 |
+
"loss": 9.3877,
|
17488 |
+
"step": 2497
|
17489 |
+
},
|
17490 |
+
{
|
17491 |
+
"epoch": 0.8616764401517765,
|
17492 |
+
"grad_norm": 1.6896843910217285,
|
17493 |
+
"learning_rate": 4.979421678682905e-06,
|
17494 |
+
"loss": 9.4596,
|
17495 |
+
"step": 2498
|
17496 |
+
},
|
17497 |
+
{
|
17498 |
+
"epoch": 0.8620213866850638,
|
17499 |
+
"grad_norm": 1.7836569547653198,
|
17500 |
+
"learning_rate": 4.95503568639929e-06,
|
17501 |
+
"loss": 9.325,
|
17502 |
+
"step": 2499
|
17503 |
+
},
|
17504 |
+
{
|
17505 |
+
"epoch": 0.8623663332183512,
|
17506 |
+
"grad_norm": 1.8124544620513916,
|
17507 |
+
"learning_rate": 4.930706440755445e-06,
|
17508 |
+
"loss": 9.3828,
|
17509 |
+
"step": 2500
|
17510 |
+
},
|
17511 |
+
{
|
17512 |
+
"epoch": 0.8627112797516385,
|
17513 |
+
"grad_norm": 0.9383218884468079,
|
17514 |
+
"learning_rate": 4.9064339724008144e-06,
|
17515 |
+
"loss": 9.5376,
|
17516 |
+
"step": 2501
|
17517 |
+
},
|
17518 |
+
{
|
17519 |
+
"epoch": 0.8630562262849258,
|
17520 |
+
"grad_norm": 1.0528539419174194,
|
17521 |
+
"learning_rate": 4.8822183119133e-06,
|
17522 |
+
"loss": 9.5191,
|
17523 |
+
"step": 2502
|
17524 |
+
},
|
17525 |
+
{
|
17526 |
+
"epoch": 0.8634011728182132,
|
17527 |
+
"grad_norm": 1.0615488290786743,
|
17528 |
+
"learning_rate": 4.858059489799266e-06,
|
17529 |
+
"loss": 9.5424,
|
17530 |
+
"step": 2503
|
17531 |
+
},
|
17532 |
+
{
|
17533 |
+
"epoch": 0.8637461193515005,
|
17534 |
+
"grad_norm": 1.0702091455459595,
|
17535 |
+
"learning_rate": 4.833957536493439e-06,
|
17536 |
+
"loss": 9.5417,
|
17537 |
+
"step": 2504
|
17538 |
+
},
|
17539 |
+
{
|
17540 |
+
"epoch": 0.8640910658847879,
|
17541 |
+
"grad_norm": 1.1862672567367554,
|
17542 |
+
"learning_rate": 4.809912482358936e-06,
|
17543 |
+
"loss": 9.5302,
|
17544 |
+
"step": 2505
|
17545 |
+
},
|
17546 |
+
{
|
17547 |
+
"epoch": 0.8644360124180752,
|
17548 |
+
"grad_norm": 1.08174729347229,
|
17549 |
+
"learning_rate": 4.785924357687166e-06,
|
17550 |
+
"loss": 9.5423,
|
17551 |
+
"step": 2506
|
17552 |
+
},
|
17553 |
+
{
|
17554 |
+
"epoch": 0.8647809589513625,
|
17555 |
+
"grad_norm": 1.0673178434371948,
|
17556 |
+
"learning_rate": 4.761993192697844e-06,
|
17557 |
+
"loss": 9.5344,
|
17558 |
+
"step": 2507
|
17559 |
+
},
|
17560 |
+
{
|
17561 |
+
"epoch": 0.8651259054846498,
|
17562 |
+
"grad_norm": 1.2792860269546509,
|
17563 |
+
"learning_rate": 4.7381190175389275e-06,
|
17564 |
+
"loss": 9.5388,
|
17565 |
+
"step": 2508
|
17566 |
+
},
|
17567 |
+
{
|
17568 |
+
"epoch": 0.8654708520179372,
|
17569 |
+
"grad_norm": 1.148232340812683,
|
17570 |
+
"learning_rate": 4.714301862286541e-06,
|
17571 |
+
"loss": 9.46,
|
17572 |
+
"step": 2509
|
17573 |
+
},
|
17574 |
+
{
|
17575 |
+
"epoch": 0.8658157985512246,
|
17576 |
+
"grad_norm": 1.2341820001602173,
|
17577 |
+
"learning_rate": 4.6905417569450275e-06,
|
17578 |
+
"loss": 9.5178,
|
17579 |
+
"step": 2510
|
17580 |
+
},
|
17581 |
+
{
|
17582 |
+
"epoch": 0.8661607450845119,
|
17583 |
+
"grad_norm": 1.114518642425537,
|
17584 |
+
"learning_rate": 4.666838731446821e-06,
|
17585 |
+
"loss": 9.5216,
|
17586 |
+
"step": 2511
|
17587 |
+
},
|
17588 |
+
{
|
17589 |
+
"epoch": 0.8665056916177992,
|
17590 |
+
"grad_norm": 1.1133232116699219,
|
17591 |
+
"learning_rate": 4.643192815652469e-06,
|
17592 |
+
"loss": 9.49,
|
17593 |
+
"step": 2512
|
17594 |
+
},
|
17595 |
+
{
|
17596 |
+
"epoch": 0.8668506381510865,
|
17597 |
+
"grad_norm": 1.1808667182922363,
|
17598 |
+
"learning_rate": 4.619604039350572e-06,
|
17599 |
+
"loss": 9.5987,
|
17600 |
+
"step": 2513
|
17601 |
+
},
|
17602 |
+
{
|
17603 |
+
"epoch": 0.867195584684374,
|
17604 |
+
"grad_norm": 1.2320611476898193,
|
17605 |
+
"learning_rate": 4.596072432257748e-06,
|
17606 |
+
"loss": 9.4902,
|
17607 |
+
"step": 2514
|
17608 |
+
},
|
17609 |
+
{
|
17610 |
+
"epoch": 0.8675405312176613,
|
17611 |
+
"grad_norm": 1.1610668897628784,
|
17612 |
+
"learning_rate": 4.572598024018571e-06,
|
17613 |
+
"loss": 9.5405,
|
17614 |
+
"step": 2515
|
17615 |
+
},
|
17616 |
+
{
|
17617 |
+
"epoch": 0.8678854777509486,
|
17618 |
+
"grad_norm": 1.1383475065231323,
|
17619 |
+
"learning_rate": 4.549180844205603e-06,
|
17620 |
+
"loss": 9.5299,
|
17621 |
+
"step": 2516
|
17622 |
+
},
|
17623 |
+
{
|
17624 |
+
"epoch": 0.8682304242842359,
|
17625 |
+
"grad_norm": 1.1785327196121216,
|
17626 |
+
"learning_rate": 4.525820922319257e-06,
|
17627 |
+
"loss": 9.5377,
|
17628 |
+
"step": 2517
|
17629 |
+
},
|
17630 |
+
{
|
17631 |
+
"epoch": 0.8685753708175232,
|
17632 |
+
"grad_norm": 1.3649024963378906,
|
17633 |
+
"learning_rate": 4.502518287787855e-06,
|
17634 |
+
"loss": 9.4778,
|
17635 |
+
"step": 2518
|
17636 |
+
},
|
17637 |
+
{
|
17638 |
+
"epoch": 0.8689203173508107,
|
17639 |
+
"grad_norm": 1.3480148315429688,
|
17640 |
+
"learning_rate": 4.4792729699675294e-06,
|
17641 |
+
"loss": 9.4929,
|
17642 |
+
"step": 2519
|
17643 |
+
},
|
17644 |
+
{
|
17645 |
+
"epoch": 0.869265263884098,
|
17646 |
+
"grad_norm": 1.2999801635742188,
|
17647 |
+
"learning_rate": 4.456084998142224e-06,
|
17648 |
+
"loss": 9.5344,
|
17649 |
+
"step": 2520
|
17650 |
+
},
|
17651 |
+
{
|
17652 |
+
"epoch": 0.8696102104173853,
|
17653 |
+
"grad_norm": 1.2308170795440674,
|
17654 |
+
"learning_rate": 4.43295440152362e-06,
|
17655 |
+
"loss": 9.478,
|
17656 |
+
"step": 2521
|
17657 |
+
},
|
17658 |
+
{
|
17659 |
+
"epoch": 0.8699551569506726,
|
17660 |
+
"grad_norm": 1.3496099710464478,
|
17661 |
+
"learning_rate": 4.409881209251121e-06,
|
17662 |
+
"loss": 9.49,
|
17663 |
+
"step": 2522
|
17664 |
+
},
|
17665 |
+
{
|
17666 |
+
"epoch": 0.87030010348396,
|
17667 |
+
"grad_norm": 1.218358039855957,
|
17668 |
+
"learning_rate": 4.386865450391836e-06,
|
17669 |
+
"loss": 9.5208,
|
17670 |
+
"step": 2523
|
17671 |
+
},
|
17672 |
+
{
|
17673 |
+
"epoch": 0.8706450500172473,
|
17674 |
+
"grad_norm": 1.460325002670288,
|
17675 |
+
"learning_rate": 4.3639071539404775e-06,
|
17676 |
+
"loss": 9.4209,
|
17677 |
+
"step": 2524
|
17678 |
+
},
|
17679 |
+
{
|
17680 |
+
"epoch": 0.8709899965505347,
|
17681 |
+
"grad_norm": 1.4746004343032837,
|
17682 |
+
"learning_rate": 4.341006348819421e-06,
|
17683 |
+
"loss": 9.5211,
|
17684 |
+
"step": 2525
|
17685 |
+
},
|
17686 |
+
{
|
17687 |
+
"epoch": 0.871334943083822,
|
17688 |
+
"grad_norm": 1.2816509008407593,
|
17689 |
+
"learning_rate": 4.318163063878561e-06,
|
17690 |
+
"loss": 9.4756,
|
17691 |
+
"step": 2526
|
17692 |
+
},
|
17693 |
+
{
|
17694 |
+
"epoch": 0.8716798896171093,
|
17695 |
+
"grad_norm": 1.3259309530258179,
|
17696 |
+
"learning_rate": 4.295377327895389e-06,
|
17697 |
+
"loss": 9.5018,
|
17698 |
+
"step": 2527
|
17699 |
+
},
|
17700 |
+
{
|
17701 |
+
"epoch": 0.8720248361503967,
|
17702 |
+
"grad_norm": 1.4273511171340942,
|
17703 |
+
"learning_rate": 4.272649169574849e-06,
|
17704 |
+
"loss": 9.4362,
|
17705 |
+
"step": 2528
|
17706 |
+
},
|
17707 |
+
{
|
17708 |
+
"epoch": 0.872369782683684,
|
17709 |
+
"grad_norm": 1.3017966747283936,
|
17710 |
+
"learning_rate": 4.249978617549361e-06,
|
17711 |
+
"loss": 9.4516,
|
17712 |
+
"step": 2529
|
17713 |
+
},
|
17714 |
+
{
|
17715 |
+
"epoch": 0.8727147292169714,
|
17716 |
+
"grad_norm": 1.2315176725387573,
|
17717 |
+
"learning_rate": 4.227365700378799e-06,
|
17718 |
+
"loss": 9.5207,
|
17719 |
+
"step": 2530
|
17720 |
+
},
|
17721 |
+
{
|
17722 |
+
"epoch": 0.8730596757502587,
|
17723 |
+
"grad_norm": 1.395295262336731,
|
17724 |
+
"learning_rate": 4.204810446550394e-06,
|
17725 |
+
"loss": 9.4892,
|
17726 |
+
"step": 2531
|
17727 |
+
},
|
17728 |
+
{
|
17729 |
+
"epoch": 0.8734046222835461,
|
17730 |
+
"grad_norm": 1.3410911560058594,
|
17731 |
+
"learning_rate": 4.182312884478767e-06,
|
17732 |
+
"loss": 9.5657,
|
17733 |
+
"step": 2532
|
17734 |
+
},
|
17735 |
+
{
|
17736 |
+
"epoch": 0.8737495688168334,
|
17737 |
+
"grad_norm": 1.4816246032714844,
|
17738 |
+
"learning_rate": 4.159873042505813e-06,
|
17739 |
+
"loss": 9.4206,
|
17740 |
+
"step": 2533
|
17741 |
+
},
|
17742 |
+
{
|
17743 |
+
"epoch": 0.8740945153501207,
|
17744 |
+
"grad_norm": 1.3325238227844238,
|
17745 |
+
"learning_rate": 4.137490948900785e-06,
|
17746 |
+
"loss": 9.484,
|
17747 |
+
"step": 2534
|
17748 |
+
},
|
17749 |
+
{
|
17750 |
+
"epoch": 0.874439461883408,
|
17751 |
+
"grad_norm": 1.395811676979065,
|
17752 |
+
"learning_rate": 4.115166631860113e-06,
|
17753 |
+
"loss": 9.5019,
|
17754 |
+
"step": 2535
|
17755 |
+
},
|
17756 |
+
{
|
17757 |
+
"epoch": 0.8747844084166954,
|
17758 |
+
"grad_norm": 1.3411831855773926,
|
17759 |
+
"learning_rate": 4.092900119507498e-06,
|
17760 |
+
"loss": 9.5089,
|
17761 |
+
"step": 2536
|
17762 |
+
},
|
17763 |
+
{
|
17764 |
+
"epoch": 0.8751293549499828,
|
17765 |
+
"grad_norm": 1.315796971321106,
|
17766 |
+
"learning_rate": 4.07069143989377e-06,
|
17767 |
+
"loss": 9.471,
|
17768 |
+
"step": 2537
|
17769 |
+
},
|
17770 |
+
{
|
17771 |
+
"epoch": 0.8754743014832701,
|
17772 |
+
"grad_norm": 1.3734862804412842,
|
17773 |
+
"learning_rate": 4.048540620996932e-06,
|
17774 |
+
"loss": 9.545,
|
17775 |
+
"step": 2538
|
17776 |
+
},
|
17777 |
+
{
|
17778 |
+
"epoch": 0.8758192480165574,
|
17779 |
+
"grad_norm": 1.5111862421035767,
|
17780 |
+
"learning_rate": 4.026447690722113e-06,
|
17781 |
+
"loss": 9.402,
|
17782 |
+
"step": 2539
|
17783 |
+
},
|
17784 |
+
{
|
17785 |
+
"epoch": 0.8761641945498447,
|
17786 |
+
"grad_norm": 1.3808101415634155,
|
17787 |
+
"learning_rate": 4.00441267690147e-06,
|
17788 |
+
"loss": 9.4543,
|
17789 |
+
"step": 2540
|
17790 |
+
},
|
17791 |
+
{
|
17792 |
+
"epoch": 0.8765091410831322,
|
17793 |
+
"grad_norm": 1.574131727218628,
|
17794 |
+
"learning_rate": 3.982435607294227e-06,
|
17795 |
+
"loss": 9.436,
|
17796 |
+
"step": 2541
|
17797 |
+
},
|
17798 |
+
{
|
17799 |
+
"epoch": 0.8768540876164195,
|
17800 |
+
"grad_norm": 1.2923246622085571,
|
17801 |
+
"learning_rate": 3.9605165095866034e-06,
|
17802 |
+
"loss": 9.5278,
|
17803 |
+
"step": 2542
|
17804 |
+
},
|
17805 |
+
{
|
17806 |
+
"epoch": 0.8771990341497068,
|
17807 |
+
"grad_norm": 1.4172004461288452,
|
17808 |
+
"learning_rate": 3.938655411391806e-06,
|
17809 |
+
"loss": 9.4752,
|
17810 |
+
"step": 2543
|
17811 |
+
},
|
17812 |
+
{
|
17813 |
+
"epoch": 0.8775439806829941,
|
17814 |
+
"grad_norm": 1.5436758995056152,
|
17815 |
+
"learning_rate": 3.916852340249932e-06,
|
17816 |
+
"loss": 9.4072,
|
17817 |
+
"step": 2544
|
17818 |
+
},
|
17819 |
+
{
|
17820 |
+
"epoch": 0.8778889272162814,
|
17821 |
+
"grad_norm": 1.5451159477233887,
|
17822 |
+
"learning_rate": 3.895107323628022e-06,
|
17823 |
+
"loss": 9.4346,
|
17824 |
+
"step": 2545
|
17825 |
+
},
|
17826 |
+
{
|
17827 |
+
"epoch": 0.8782338737495688,
|
17828 |
+
"grad_norm": 1.6603964567184448,
|
17829 |
+
"learning_rate": 3.873420388919951e-06,
|
17830 |
+
"loss": 9.3515,
|
17831 |
+
"step": 2546
|
17832 |
+
},
|
17833 |
+
{
|
17834 |
+
"epoch": 0.8785788202828562,
|
17835 |
+
"grad_norm": 1.6220159530639648,
|
17836 |
+
"learning_rate": 3.851791563446444e-06,
|
17837 |
+
"loss": 9.354,
|
17838 |
+
"step": 2547
|
17839 |
+
},
|
17840 |
+
{
|
17841 |
+
"epoch": 0.8789237668161435,
|
17842 |
+
"grad_norm": 1.769572138786316,
|
17843 |
+
"learning_rate": 3.8302208744549985e-06,
|
17844 |
+
"loss": 9.3154,
|
17845 |
+
"step": 2548
|
17846 |
+
},
|
17847 |
+
{
|
17848 |
+
"epoch": 0.8792687133494308,
|
17849 |
+
"grad_norm": 1.58255136013031,
|
17850 |
+
"learning_rate": 3.8087083491199104e-06,
|
17851 |
+
"loss": 9.4663,
|
17852 |
+
"step": 2549
|
17853 |
+
},
|
17854 |
+
{
|
17855 |
+
"epoch": 0.8796136598827182,
|
17856 |
+
"grad_norm": 1.7701119184494019,
|
17857 |
+
"learning_rate": 3.7872540145421574e-06,
|
17858 |
+
"loss": 9.3808,
|
17859 |
+
"step": 2550
|
17860 |
+
},
|
17861 |
+
{
|
17862 |
+
"epoch": 0.8799586064160055,
|
17863 |
+
"grad_norm": 0.9108375310897827,
|
17864 |
+
"learning_rate": 3.765857897749431e-06,
|
17865 |
+
"loss": 9.5752,
|
17866 |
+
"step": 2551
|
17867 |
+
},
|
17868 |
+
{
|
17869 |
+
"epoch": 0.8803035529492929,
|
17870 |
+
"grad_norm": 1.0239676237106323,
|
17871 |
+
"learning_rate": 3.7445200256961023e-06,
|
17872 |
+
"loss": 9.6122,
|
17873 |
+
"step": 2552
|
17874 |
+
},
|
17875 |
+
{
|
17876 |
+
"epoch": 0.8806484994825802,
|
17877 |
+
"grad_norm": 1.0186281204223633,
|
17878 |
+
"learning_rate": 3.723240425263119e-06,
|
17879 |
+
"loss": 9.5595,
|
17880 |
+
"step": 2553
|
17881 |
+
},
|
17882 |
+
{
|
17883 |
+
"epoch": 0.8809934460158675,
|
17884 |
+
"grad_norm": 1.052709698677063,
|
17885 |
+
"learning_rate": 3.7020191232580603e-06,
|
17886 |
+
"loss": 9.5816,
|
17887 |
+
"step": 2554
|
17888 |
+
},
|
17889 |
+
{
|
17890 |
+
"epoch": 0.8813383925491549,
|
17891 |
+
"grad_norm": 1.0933276414871216,
|
17892 |
+
"learning_rate": 3.680856146415046e-06,
|
17893 |
+
"loss": 9.538,
|
17894 |
+
"step": 2555
|
17895 |
+
},
|
17896 |
+
{
|
17897 |
+
"epoch": 0.8816833390824422,
|
17898 |
+
"grad_norm": 1.128589153289795,
|
17899 |
+
"learning_rate": 3.659751521394733e-06,
|
17900 |
+
"loss": 9.5611,
|
17901 |
+
"step": 2556
|
17902 |
+
},
|
17903 |
+
{
|
17904 |
+
"epoch": 0.8820282856157295,
|
17905 |
+
"grad_norm": 1.0991203784942627,
|
17906 |
+
"learning_rate": 3.6387052747842376e-06,
|
17907 |
+
"loss": 9.601,
|
17908 |
+
"step": 2557
|
17909 |
+
},
|
17910 |
+
{
|
17911 |
+
"epoch": 0.8823732321490169,
|
17912 |
+
"grad_norm": 1.1087894439697266,
|
17913 |
+
"learning_rate": 3.6177174330971675e-06,
|
17914 |
+
"loss": 9.4994,
|
17915 |
+
"step": 2558
|
17916 |
+
},
|
17917 |
+
{
|
17918 |
+
"epoch": 0.8827181786823043,
|
17919 |
+
"grad_norm": 1.183724045753479,
|
17920 |
+
"learning_rate": 3.5967880227735172e-06,
|
17921 |
+
"loss": 9.553,
|
17922 |
+
"step": 2559
|
17923 |
+
},
|
17924 |
+
{
|
17925 |
+
"epoch": 0.8830631252155916,
|
17926 |
+
"grad_norm": 1.090376615524292,
|
17927 |
+
"learning_rate": 3.5759170701797017e-06,
|
17928 |
+
"loss": 9.5626,
|
17929 |
+
"step": 2560
|
17930 |
+
},
|
17931 |
+
{
|
17932 |
+
"epoch": 0.8834080717488789,
|
17933 |
+
"grad_norm": 1.3279502391815186,
|
17934 |
+
"learning_rate": 3.555104601608483e-06,
|
17935 |
+
"loss": 9.5579,
|
17936 |
+
"step": 2561
|
17937 |
+
},
|
17938 |
+
{
|
17939 |
+
"epoch": 0.8837530182821662,
|
17940 |
+
"grad_norm": 1.1904933452606201,
|
17941 |
+
"learning_rate": 3.5343506432789498e-06,
|
17942 |
+
"loss": 9.5454,
|
17943 |
+
"step": 2562
|
17944 |
+
},
|
17945 |
+
{
|
17946 |
+
"epoch": 0.8840979648154536,
|
17947 |
+
"grad_norm": 1.2588059902191162,
|
17948 |
+
"learning_rate": 3.5136552213364593e-06,
|
17949 |
+
"loss": 9.5167,
|
17950 |
+
"step": 2563
|
17951 |
+
},
|
17952 |
+
{
|
17953 |
+
"epoch": 0.884442911348741,
|
17954 |
+
"grad_norm": 1.151564598083496,
|
17955 |
+
"learning_rate": 3.493018361852651e-06,
|
17956 |
+
"loss": 9.5044,
|
17957 |
+
"step": 2564
|
17958 |
+
},
|
17959 |
+
{
|
17960 |
+
"epoch": 0.8847878578820283,
|
17961 |
+
"grad_norm": 1.164340853691101,
|
17962 |
+
"learning_rate": 3.4724400908253853e-06,
|
17963 |
+
"loss": 9.5034,
|
17964 |
+
"step": 2565
|
17965 |
+
},
|
17966 |
+
{
|
17967 |
+
"epoch": 0.8851328044153156,
|
17968 |
+
"grad_norm": 1.254442572593689,
|
17969 |
+
"learning_rate": 3.4519204341786902e-06,
|
17970 |
+
"loss": 9.558,
|
17971 |
+
"step": 2566
|
17972 |
+
},
|
17973 |
+
{
|
17974 |
+
"epoch": 0.8854777509486029,
|
17975 |
+
"grad_norm": 1.2330467700958252,
|
17976 |
+
"learning_rate": 3.4314594177627944e-06,
|
17977 |
+
"loss": 9.4793,
|
17978 |
+
"step": 2567
|
17979 |
+
},
|
17980 |
+
{
|
17981 |
+
"epoch": 0.8858226974818904,
|
17982 |
+
"grad_norm": 1.302596092224121,
|
17983 |
+
"learning_rate": 3.4110570673539955e-06,
|
17984 |
+
"loss": 9.5275,
|
17985 |
+
"step": 2568
|
17986 |
+
},
|
17987 |
+
{
|
17988 |
+
"epoch": 0.8861676440151777,
|
17989 |
+
"grad_norm": 1.3285911083221436,
|
17990 |
+
"learning_rate": 3.390713408654761e-06,
|
17991 |
+
"loss": 9.5662,
|
17992 |
+
"step": 2569
|
17993 |
+
},
|
17994 |
+
{
|
17995 |
+
"epoch": 0.886512590548465,
|
17996 |
+
"grad_norm": 1.2535372972488403,
|
17997 |
+
"learning_rate": 3.370428467293546e-06,
|
17998 |
+
"loss": 9.5002,
|
17999 |
+
"step": 2570
|
18000 |
+
},
|
18001 |
+
{
|
18002 |
+
"epoch": 0.8868575370817523,
|
18003 |
+
"grad_norm": 1.2314890623092651,
|
18004 |
+
"learning_rate": 3.3502022688248867e-06,
|
18005 |
+
"loss": 9.5106,
|
18006 |
+
"step": 2571
|
18007 |
+
},
|
18008 |
+
{
|
18009 |
+
"epoch": 0.8872024836150396,
|
18010 |
+
"grad_norm": 1.347776174545288,
|
18011 |
+
"learning_rate": 3.330034838729279e-06,
|
18012 |
+
"loss": 9.4378,
|
18013 |
+
"step": 2572
|
18014 |
+
},
|
18015 |
+
{
|
18016 |
+
"epoch": 0.887547430148327,
|
18017 |
+
"grad_norm": 1.3018256425857544,
|
18018 |
+
"learning_rate": 3.309926202413205e-06,
|
18019 |
+
"loss": 9.5894,
|
18020 |
+
"step": 2573
|
18021 |
+
},
|
18022 |
+
{
|
18023 |
+
"epoch": 0.8878923766816144,
|
18024 |
+
"grad_norm": 1.2346081733703613,
|
18025 |
+
"learning_rate": 3.2898763852090895e-06,
|
18026 |
+
"loss": 9.498,
|
18027 |
+
"step": 2574
|
18028 |
+
},
|
18029 |
+
{
|
18030 |
+
"epoch": 0.8882373232149017,
|
18031 |
+
"grad_norm": 1.3945900201797485,
|
18032 |
+
"learning_rate": 3.269885412375223e-06,
|
18033 |
+
"loss": 9.46,
|
18034 |
+
"step": 2575
|
18035 |
+
},
|
18036 |
+
{
|
18037 |
+
"epoch": 0.888582269748189,
|
18038 |
+
"grad_norm": 1.4205291271209717,
|
18039 |
+
"learning_rate": 3.249953309095799e-06,
|
18040 |
+
"loss": 9.5027,
|
18041 |
+
"step": 2576
|
18042 |
+
},
|
18043 |
+
{
|
18044 |
+
"epoch": 0.8889272162814764,
|
18045 |
+
"grad_norm": 1.2638293504714966,
|
18046 |
+
"learning_rate": 3.2300801004808314e-06,
|
18047 |
+
"loss": 9.491,
|
18048 |
+
"step": 2577
|
18049 |
+
},
|
18050 |
+
{
|
18051 |
+
"epoch": 0.8892721628147637,
|
18052 |
+
"grad_norm": 1.215364694595337,
|
18053 |
+
"learning_rate": 3.21026581156616e-06,
|
18054 |
+
"loss": 9.5241,
|
18055 |
+
"step": 2578
|
18056 |
+
},
|
18057 |
+
{
|
18058 |
+
"epoch": 0.889617109348051,
|
18059 |
+
"grad_norm": 1.4178768396377563,
|
18060 |
+
"learning_rate": 3.1905104673133625e-06,
|
18061 |
+
"loss": 9.5186,
|
18062 |
+
"step": 2579
|
18063 |
+
},
|
18064 |
+
{
|
18065 |
+
"epoch": 0.8899620558813384,
|
18066 |
+
"grad_norm": 1.3347620964050293,
|
18067 |
+
"learning_rate": 3.170814092609792e-06,
|
18068 |
+
"loss": 9.4986,
|
18069 |
+
"step": 2580
|
18070 |
+
},
|
18071 |
+
{
|
18072 |
+
"epoch": 0.8903070024146257,
|
18073 |
+
"grad_norm": 1.4867733716964722,
|
18074 |
+
"learning_rate": 3.151176712268489e-06,
|
18075 |
+
"loss": 9.497,
|
18076 |
+
"step": 2581
|
18077 |
+
},
|
18078 |
+
{
|
18079 |
+
"epoch": 0.8906519489479131,
|
18080 |
+
"grad_norm": 1.451385259628296,
|
18081 |
+
"learning_rate": 3.1315983510281976e-06,
|
18082 |
+
"loss": 9.4539,
|
18083 |
+
"step": 2582
|
18084 |
+
},
|
18085 |
+
{
|
18086 |
+
"epoch": 0.8909968954812004,
|
18087 |
+
"grad_norm": 1.2720712423324585,
|
18088 |
+
"learning_rate": 3.1120790335533e-06,
|
18089 |
+
"loss": 9.5447,
|
18090 |
+
"step": 2583
|
18091 |
+
},
|
18092 |
+
{
|
18093 |
+
"epoch": 0.8913418420144877,
|
18094 |
+
"grad_norm": 1.3472886085510254,
|
18095 |
+
"learning_rate": 3.0926187844337984e-06,
|
18096 |
+
"loss": 9.4754,
|
18097 |
+
"step": 2584
|
18098 |
+
},
|
18099 |
+
{
|
18100 |
+
"epoch": 0.891686788547775,
|
18101 |
+
"grad_norm": 1.3972887992858887,
|
18102 |
+
"learning_rate": 3.0732176281852652e-06,
|
18103 |
+
"loss": 9.4664,
|
18104 |
+
"step": 2585
|
18105 |
+
},
|
18106 |
+
{
|
18107 |
+
"epoch": 0.8920317350810625,
|
18108 |
+
"grad_norm": 1.3255772590637207,
|
18109 |
+
"learning_rate": 3.053875589248861e-06,
|
18110 |
+
"loss": 9.5324,
|
18111 |
+
"step": 2586
|
18112 |
+
},
|
18113 |
+
{
|
18114 |
+
"epoch": 0.8923766816143498,
|
18115 |
+
"grad_norm": 1.3736553192138672,
|
18116 |
+
"learning_rate": 3.0345926919912505e-06,
|
18117 |
+
"loss": 9.4713,
|
18118 |
+
"step": 2587
|
18119 |
+
},
|
18120 |
+
{
|
18121 |
+
"epoch": 0.8927216281476371,
|
18122 |
+
"grad_norm": 1.5358455181121826,
|
18123 |
+
"learning_rate": 3.0153689607045845e-06,
|
18124 |
+
"loss": 9.3817,
|
18125 |
+
"step": 2588
|
18126 |
+
},
|
18127 |
+
{
|
18128 |
+
"epoch": 0.8930665746809244,
|
18129 |
+
"grad_norm": 1.3966903686523438,
|
18130 |
+
"learning_rate": 2.996204419606502e-06,
|
18131 |
+
"loss": 9.4691,
|
18132 |
+
"step": 2589
|
18133 |
+
},
|
18134 |
+
{
|
18135 |
+
"epoch": 0.8934115212142117,
|
18136 |
+
"grad_norm": 1.3497977256774902,
|
18137 |
+
"learning_rate": 2.9770990928400576e-06,
|
18138 |
+
"loss": 9.5051,
|
18139 |
+
"step": 2590
|
18140 |
+
},
|
18141 |
+
{
|
18142 |
+
"epoch": 0.8937564677474992,
|
18143 |
+
"grad_norm": 1.4420788288116455,
|
18144 |
+
"learning_rate": 2.9580530044737263e-06,
|
18145 |
+
"loss": 9.5041,
|
18146 |
+
"step": 2591
|
18147 |
+
},
|
18148 |
+
{
|
18149 |
+
"epoch": 0.8941014142807865,
|
18150 |
+
"grad_norm": 1.465009331703186,
|
18151 |
+
"learning_rate": 2.939066178501332e-06,
|
18152 |
+
"loss": 9.4508,
|
18153 |
+
"step": 2592
|
18154 |
+
},
|
18155 |
+
{
|
18156 |
+
"epoch": 0.8944463608140738,
|
18157 |
+
"grad_norm": 1.221989631652832,
|
18158 |
+
"learning_rate": 2.920138638842068e-06,
|
18159 |
+
"loss": 9.469,
|
18160 |
+
"step": 2593
|
18161 |
+
},
|
18162 |
+
{
|
18163 |
+
"epoch": 0.8947913073473611,
|
18164 |
+
"grad_norm": 1.483028769493103,
|
18165 |
+
"learning_rate": 2.9012704093404062e-06,
|
18166 |
+
"loss": 9.4044,
|
18167 |
+
"step": 2594
|
18168 |
+
},
|
18169 |
+
{
|
18170 |
+
"epoch": 0.8951362538806485,
|
18171 |
+
"grad_norm": 1.5430452823638916,
|
18172 |
+
"learning_rate": 2.882461513766133e-06,
|
18173 |
+
"loss": 9.4083,
|
18174 |
+
"step": 2595
|
18175 |
+
},
|
18176 |
+
{
|
18177 |
+
"epoch": 0.8954812004139359,
|
18178 |
+
"grad_norm": 1.4477607011795044,
|
18179 |
+
"learning_rate": 2.8637119758142707e-06,
|
18180 |
+
"loss": 9.4623,
|
18181 |
+
"step": 2596
|
18182 |
+
},
|
18183 |
+
{
|
18184 |
+
"epoch": 0.8958261469472232,
|
18185 |
+
"grad_norm": 1.4406673908233643,
|
18186 |
+
"learning_rate": 2.8450218191050705e-06,
|
18187 |
+
"loss": 9.4275,
|
18188 |
+
"step": 2597
|
18189 |
+
},
|
18190 |
+
{
|
18191 |
+
"epoch": 0.8961710934805105,
|
18192 |
+
"grad_norm": 1.5870102643966675,
|
18193 |
+
"learning_rate": 2.8263910671839866e-06,
|
18194 |
+
"loss": 9.4317,
|
18195 |
+
"step": 2598
|
18196 |
+
},
|
18197 |
+
{
|
18198 |
+
"epoch": 0.8965160400137978,
|
18199 |
+
"grad_norm": 1.490122675895691,
|
18200 |
+
"learning_rate": 2.8078197435216e-06,
|
18201 |
+
"loss": 9.4802,
|
18202 |
+
"step": 2599
|
18203 |
+
},
|
18204 |
+
{
|
18205 |
+
"epoch": 0.8968609865470852,
|
18206 |
+
"grad_norm": 1.7018402814865112,
|
18207 |
+
"learning_rate": 2.7893078715136687e-06,
|
18208 |
+
"loss": 9.3888,
|
18209 |
+
"step": 2600
|
18210 |
+
},
|
18211 |
+
{
|
18212 |
+
"epoch": 0.8972059330803726,
|
18213 |
+
"grad_norm": 0.8538911938667297,
|
18214 |
+
"learning_rate": 2.770855474481021e-06,
|
18215 |
+
"loss": 9.6266,
|
18216 |
+
"step": 2601
|
18217 |
+
},
|
18218 |
+
{
|
18219 |
+
"epoch": 0.8975508796136599,
|
18220 |
+
"grad_norm": 1.0329569578170776,
|
18221 |
+
"learning_rate": 2.7524625756695954e-06,
|
18222 |
+
"loss": 9.5183,
|
18223 |
+
"step": 2602
|
18224 |
+
},
|
18225 |
+
{
|
18226 |
+
"epoch": 0.8978958261469472,
|
18227 |
+
"grad_norm": 1.030372142791748,
|
18228 |
+
"learning_rate": 2.734129198250318e-06,
|
18229 |
+
"loss": 9.5589,
|
18230 |
+
"step": 2603
|
18231 |
+
},
|
18232 |
+
{
|
18233 |
+
"epoch": 0.8982407726802346,
|
18234 |
+
"grad_norm": 1.0216647386550903,
|
18235 |
+
"learning_rate": 2.7158553653192144e-06,
|
18236 |
+
"loss": 9.5562,
|
18237 |
+
"step": 2604
|
18238 |
+
},
|
18239 |
+
{
|
18240 |
+
"epoch": 0.8985857192135219,
|
18241 |
+
"grad_norm": 1.0627166032791138,
|
18242 |
+
"learning_rate": 2.6976410998972136e-06,
|
18243 |
+
"loss": 9.5075,
|
18244 |
+
"step": 2605
|
18245 |
+
},
|
18246 |
+
{
|
18247 |
+
"epoch": 0.8989306657468092,
|
18248 |
+
"grad_norm": 1.1064709424972534,
|
18249 |
+
"learning_rate": 2.6794864249302664e-06,
|
18250 |
+
"loss": 9.5467,
|
18251 |
+
"step": 2606
|
18252 |
+
},
|
18253 |
+
{
|
18254 |
+
"epoch": 0.8992756122800966,
|
18255 |
+
"grad_norm": 1.0749237537384033,
|
18256 |
+
"learning_rate": 2.6613913632892064e-06,
|
18257 |
+
"loss": 9.5038,
|
18258 |
+
"step": 2607
|
18259 |
+
},
|
18260 |
+
{
|
18261 |
+
"epoch": 0.8996205588133839,
|
18262 |
+
"grad_norm": 1.1983023881912231,
|
18263 |
+
"learning_rate": 2.6433559377697925e-06,
|
18264 |
+
"loss": 9.4699,
|
18265 |
+
"step": 2608
|
18266 |
+
},
|
18267 |
+
{
|
18268 |
+
"epoch": 0.8999655053466713,
|
18269 |
+
"grad_norm": 1.2673431634902954,
|
18270 |
+
"learning_rate": 2.625380171092667e-06,
|
18271 |
+
"loss": 9.482,
|
18272 |
+
"step": 2609
|
18273 |
+
},
|
18274 |
+
{
|
18275 |
+
"epoch": 0.9003104518799586,
|
18276 |
+
"grad_norm": 1.168282389640808,
|
18277 |
+
"learning_rate": 2.6074640859032718e-06,
|
18278 |
+
"loss": 9.5551,
|
18279 |
+
"step": 2610
|
18280 |
+
},
|
18281 |
+
{
|
18282 |
+
"epoch": 0.9006553984132459,
|
18283 |
+
"grad_norm": 1.100938320159912,
|
18284 |
+
"learning_rate": 2.5896077047719237e-06,
|
18285 |
+
"loss": 9.6096,
|
18286 |
+
"step": 2611
|
18287 |
+
},
|
18288 |
+
{
|
18289 |
+
"epoch": 0.9010003449465332,
|
18290 |
+
"grad_norm": 1.2417744398117065,
|
18291 |
+
"learning_rate": 2.5718110501936675e-06,
|
18292 |
+
"loss": 9.5024,
|
18293 |
+
"step": 2612
|
18294 |
+
},
|
18295 |
+
{
|
18296 |
+
"epoch": 0.9013452914798207,
|
18297 |
+
"grad_norm": 1.1761174201965332,
|
18298 |
+
"learning_rate": 2.554074144588342e-06,
|
18299 |
+
"loss": 9.5431,
|
18300 |
+
"step": 2613
|
18301 |
+
},
|
18302 |
+
{
|
18303 |
+
"epoch": 0.901690238013108,
|
18304 |
+
"grad_norm": 1.2780194282531738,
|
18305 |
+
"learning_rate": 2.5363970103004955e-06,
|
18306 |
+
"loss": 9.5324,
|
18307 |
+
"step": 2614
|
18308 |
+
},
|
18309 |
+
{
|
18310 |
+
"epoch": 0.9020351845463953,
|
18311 |
+
"grad_norm": 1.1857703924179077,
|
18312 |
+
"learning_rate": 2.5187796695994026e-06,
|
18313 |
+
"loss": 9.4997,
|
18314 |
+
"step": 2615
|
18315 |
+
},
|
18316 |
+
{
|
18317 |
+
"epoch": 0.9023801310796826,
|
18318 |
+
"grad_norm": 1.2250052690505981,
|
18319 |
+
"learning_rate": 2.5012221446789775e-06,
|
18320 |
+
"loss": 9.4995,
|
18321 |
+
"step": 2616
|
18322 |
+
},
|
18323 |
+
{
|
18324 |
+
"epoch": 0.9027250776129699,
|
18325 |
+
"grad_norm": 1.1668322086334229,
|
18326 |
+
"learning_rate": 2.4837244576578047e-06,
|
18327 |
+
"loss": 9.4859,
|
18328 |
+
"step": 2617
|
18329 |
+
},
|
18330 |
+
{
|
18331 |
+
"epoch": 0.9030700241462574,
|
18332 |
+
"grad_norm": 1.3255215883255005,
|
18333 |
+
"learning_rate": 2.4662866305790842e-06,
|
18334 |
+
"loss": 9.4968,
|
18335 |
+
"step": 2618
|
18336 |
+
},
|
18337 |
+
{
|
18338 |
+
"epoch": 0.9034149706795447,
|
18339 |
+
"grad_norm": 1.371646761894226,
|
18340 |
+
"learning_rate": 2.4489086854105946e-06,
|
18341 |
+
"loss": 9.5009,
|
18342 |
+
"step": 2619
|
18343 |
+
},
|
18344 |
+
{
|
18345 |
+
"epoch": 0.903759917212832,
|
18346 |
+
"grad_norm": 1.182887315750122,
|
18347 |
+
"learning_rate": 2.4315906440446955e-06,
|
18348 |
+
"loss": 9.5169,
|
18349 |
+
"step": 2620
|
18350 |
+
},
|
18351 |
+
{
|
18352 |
+
"epoch": 0.9041048637461193,
|
18353 |
+
"grad_norm": 1.215737223625183,
|
18354 |
+
"learning_rate": 2.414332528298252e-06,
|
18355 |
+
"loss": 9.5355,
|
18356 |
+
"step": 2621
|
18357 |
+
},
|
18358 |
+
{
|
18359 |
+
"epoch": 0.9044498102794067,
|
18360 |
+
"grad_norm": 1.2013062238693237,
|
18361 |
+
"learning_rate": 2.397134359912667e-06,
|
18362 |
+
"loss": 9.4352,
|
18363 |
+
"step": 2622
|
18364 |
+
},
|
18365 |
+
{
|
18366 |
+
"epoch": 0.9047947568126941,
|
18367 |
+
"grad_norm": 1.2379329204559326,
|
18368 |
+
"learning_rate": 2.3799961605537937e-06,
|
18369 |
+
"loss": 9.4499,
|
18370 |
+
"step": 2623
|
18371 |
+
},
|
18372 |
+
{
|
18373 |
+
"epoch": 0.9051397033459814,
|
18374 |
+
"grad_norm": 1.1866499185562134,
|
18375 |
+
"learning_rate": 2.3629179518119606e-06,
|
18376 |
+
"loss": 9.5595,
|
18377 |
+
"step": 2624
|
18378 |
+
},
|
18379 |
+
{
|
18380 |
+
"epoch": 0.9054846498792687,
|
18381 |
+
"grad_norm": 1.3945666551589966,
|
18382 |
+
"learning_rate": 2.345899755201919e-06,
|
18383 |
+
"loss": 9.4705,
|
18384 |
+
"step": 2625
|
18385 |
+
},
|
18386 |
+
{
|
18387 |
+
"epoch": 0.905829596412556,
|
18388 |
+
"grad_norm": 1.3572181463241577,
|
18389 |
+
"learning_rate": 2.328941592162809e-06,
|
18390 |
+
"loss": 9.4464,
|
18391 |
+
"step": 2626
|
18392 |
+
},
|
18393 |
+
{
|
18394 |
+
"epoch": 0.9061745429458434,
|
18395 |
+
"grad_norm": 1.3635412454605103,
|
18396 |
+
"learning_rate": 2.3120434840581406e-06,
|
18397 |
+
"loss": 9.5151,
|
18398 |
+
"step": 2627
|
18399 |
+
},
|
18400 |
+
{
|
18401 |
+
"epoch": 0.9065194894791307,
|
18402 |
+
"grad_norm": 1.2851320505142212,
|
18403 |
+
"learning_rate": 2.2952054521757804e-06,
|
18404 |
+
"loss": 9.5445,
|
18405 |
+
"step": 2628
|
18406 |
+
},
|
18407 |
+
{
|
18408 |
+
"epoch": 0.9068644360124181,
|
18409 |
+
"grad_norm": 1.4047993421554565,
|
18410 |
+
"learning_rate": 2.2784275177278934e-06,
|
18411 |
+
"loss": 9.3997,
|
18412 |
+
"step": 2629
|
18413 |
}
|
18414 |
],
|
18415 |
"logging_steps": 1,
|
|
|
18429 |
"attributes": {}
|
18430 |
}
|
18431 |
},
|
18432 |
+
"total_flos": 545736876883968.0,
|
18433 |
"train_batch_size": 4,
|
18434 |
"trial_name": null,
|
18435 |
"trial_params": null
|