Training in progress, step 53887, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 306619286
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:011159f6c7dc96dd5ac1366209b10dab05e097df1b102941e45c82da68b59f2f
|
3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 919972410
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:779761eab32018c8446b727954ef35d7120f4a3ef7c4759e47144a985a28812e
|
3 |
size 919972410
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25d300bc86beb0072df8e8773ec27f6e09afa7db83f06d03ffc9b42be6152925
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -35087,6 +35087,2722 @@
|
|
35087 |
"eval_samples_per_second": 3188.771,
|
35088 |
"eval_steps_per_second": 49.826,
|
35089 |
"step": 50000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35090 |
}
|
35091 |
],
|
35092 |
"logging_steps": 10,
|
@@ -35101,12 +37817,12 @@
|
|
35101 |
"should_evaluate": false,
|
35102 |
"should_log": false,
|
35103 |
"should_save": true,
|
35104 |
-
"should_training_stop":
|
35105 |
},
|
35106 |
"attributes": {}
|
35107 |
}
|
35108 |
},
|
35109 |
-
"total_flos":
|
35110 |
"train_batch_size": 8,
|
35111 |
"trial_name": null,
|
35112 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.999997680336444,
|
5 |
"eval_steps": 5000,
|
6 |
+
"global_step": 53887,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
35087 |
"eval_samples_per_second": 3188.771,
|
35088 |
"eval_steps_per_second": 49.826,
|
35089 |
"step": 50000
|
35090 |
+
},
|
35091 |
+
{
|
35092 |
+
"epoch": 0.928050995483615,
|
35093 |
+
"grad_norm": 36.09375,
|
35094 |
+
"learning_rate": 9.854992074001885e-06,
|
35095 |
+
"loss": 18.631,
|
35096 |
+
"step": 50010
|
35097 |
+
},
|
35098 |
+
{
|
35099 |
+
"epoch": 0.9282365685680949,
|
35100 |
+
"grad_norm": 38.5,
|
35101 |
+
"learning_rate": 9.854963078215844e-06,
|
35102 |
+
"loss": 18.7004,
|
35103 |
+
"step": 50020
|
35104 |
+
},
|
35105 |
+
{
|
35106 |
+
"epoch": 0.9284221416525748,
|
35107 |
+
"grad_norm": 37.15625,
|
35108 |
+
"learning_rate": 9.854934082429802e-06,
|
35109 |
+
"loss": 18.9797,
|
35110 |
+
"step": 50030
|
35111 |
+
},
|
35112 |
+
{
|
35113 |
+
"epoch": 0.9286077147370545,
|
35114 |
+
"grad_norm": 37.1875,
|
35115 |
+
"learning_rate": 9.854905086643759e-06,
|
35116 |
+
"loss": 18.6544,
|
35117 |
+
"step": 50040
|
35118 |
+
},
|
35119 |
+
{
|
35120 |
+
"epoch": 0.9287932878215344,
|
35121 |
+
"grad_norm": 36.0625,
|
35122 |
+
"learning_rate": 9.854876090857716e-06,
|
35123 |
+
"loss": 18.7909,
|
35124 |
+
"step": 50050
|
35125 |
+
},
|
35126 |
+
{
|
35127 |
+
"epoch": 0.9289788609060142,
|
35128 |
+
"grad_norm": 38.3125,
|
35129 |
+
"learning_rate": 9.854847095071675e-06,
|
35130 |
+
"loss": 19.1664,
|
35131 |
+
"step": 50060
|
35132 |
+
},
|
35133 |
+
{
|
35134 |
+
"epoch": 0.929164433990494,
|
35135 |
+
"grad_norm": 37.15625,
|
35136 |
+
"learning_rate": 9.854818099285631e-06,
|
35137 |
+
"loss": 18.7882,
|
35138 |
+
"step": 50070
|
35139 |
+
},
|
35140 |
+
{
|
35141 |
+
"epoch": 0.9293500070749738,
|
35142 |
+
"grad_norm": 34.5625,
|
35143 |
+
"learning_rate": 9.854789103499589e-06,
|
35144 |
+
"loss": 18.8722,
|
35145 |
+
"step": 50080
|
35146 |
+
},
|
35147 |
+
{
|
35148 |
+
"epoch": 0.9295355801594537,
|
35149 |
+
"grad_norm": 39.28125,
|
35150 |
+
"learning_rate": 9.854760107713548e-06,
|
35151 |
+
"loss": 18.4605,
|
35152 |
+
"step": 50090
|
35153 |
+
},
|
35154 |
+
{
|
35155 |
+
"epoch": 0.9297211532439335,
|
35156 |
+
"grad_norm": 36.0625,
|
35157 |
+
"learning_rate": 9.854731111927505e-06,
|
35158 |
+
"loss": 18.957,
|
35159 |
+
"step": 50100
|
35160 |
+
},
|
35161 |
+
{
|
35162 |
+
"epoch": 0.9299067263284133,
|
35163 |
+
"grad_norm": 36.9375,
|
35164 |
+
"learning_rate": 9.854702116141462e-06,
|
35165 |
+
"loss": 18.9987,
|
35166 |
+
"step": 50110
|
35167 |
+
},
|
35168 |
+
{
|
35169 |
+
"epoch": 0.9300922994128932,
|
35170 |
+
"grad_norm": 37.09375,
|
35171 |
+
"learning_rate": 9.85467312035542e-06,
|
35172 |
+
"loss": 18.3386,
|
35173 |
+
"step": 50120
|
35174 |
+
},
|
35175 |
+
{
|
35176 |
+
"epoch": 0.9302778724973729,
|
35177 |
+
"grad_norm": 36.78125,
|
35178 |
+
"learning_rate": 9.854644124569377e-06,
|
35179 |
+
"loss": 18.7949,
|
35180 |
+
"step": 50130
|
35181 |
+
},
|
35182 |
+
{
|
35183 |
+
"epoch": 0.9304634455818528,
|
35184 |
+
"grad_norm": 35.59375,
|
35185 |
+
"learning_rate": 9.854615128783335e-06,
|
35186 |
+
"loss": 18.5927,
|
35187 |
+
"step": 50140
|
35188 |
+
},
|
35189 |
+
{
|
35190 |
+
"epoch": 0.9306490186663327,
|
35191 |
+
"grad_norm": 35.78125,
|
35192 |
+
"learning_rate": 9.854586132997292e-06,
|
35193 |
+
"loss": 18.7013,
|
35194 |
+
"step": 50150
|
35195 |
+
},
|
35196 |
+
{
|
35197 |
+
"epoch": 0.9308345917508125,
|
35198 |
+
"grad_norm": 37.1875,
|
35199 |
+
"learning_rate": 9.85455713721125e-06,
|
35200 |
+
"loss": 18.6819,
|
35201 |
+
"step": 50160
|
35202 |
+
},
|
35203 |
+
{
|
35204 |
+
"epoch": 0.9310201648352923,
|
35205 |
+
"grad_norm": 36.3125,
|
35206 |
+
"learning_rate": 9.854528141425207e-06,
|
35207 |
+
"loss": 19.1635,
|
35208 |
+
"step": 50170
|
35209 |
+
},
|
35210 |
+
{
|
35211 |
+
"epoch": 0.9312057379197721,
|
35212 |
+
"grad_norm": 34.40625,
|
35213 |
+
"learning_rate": 9.854499145639164e-06,
|
35214 |
+
"loss": 18.2726,
|
35215 |
+
"step": 50180
|
35216 |
+
},
|
35217 |
+
{
|
35218 |
+
"epoch": 0.931391311004252,
|
35219 |
+
"grad_norm": 37.28125,
|
35220 |
+
"learning_rate": 9.854470149853122e-06,
|
35221 |
+
"loss": 18.9638,
|
35222 |
+
"step": 50190
|
35223 |
+
},
|
35224 |
+
{
|
35225 |
+
"epoch": 0.9315768840887317,
|
35226 |
+
"grad_norm": 35.28125,
|
35227 |
+
"learning_rate": 9.85444115406708e-06,
|
35228 |
+
"loss": 19.0953,
|
35229 |
+
"step": 50200
|
35230 |
+
},
|
35231 |
+
{
|
35232 |
+
"epoch": 0.9317624571732116,
|
35233 |
+
"grad_norm": 38.5,
|
35234 |
+
"learning_rate": 9.854412158281038e-06,
|
35235 |
+
"loss": 18.7024,
|
35236 |
+
"step": 50210
|
35237 |
+
},
|
35238 |
+
{
|
35239 |
+
"epoch": 0.9319480302576915,
|
35240 |
+
"grad_norm": 37.0,
|
35241 |
+
"learning_rate": 9.854383162494996e-06,
|
35242 |
+
"loss": 18.825,
|
35243 |
+
"step": 50220
|
35244 |
+
},
|
35245 |
+
{
|
35246 |
+
"epoch": 0.9321336033421712,
|
35247 |
+
"grad_norm": 36.59375,
|
35248 |
+
"learning_rate": 9.854354166708953e-06,
|
35249 |
+
"loss": 19.0501,
|
35250 |
+
"step": 50230
|
35251 |
+
},
|
35252 |
+
{
|
35253 |
+
"epoch": 0.9323191764266511,
|
35254 |
+
"grad_norm": 35.90625,
|
35255 |
+
"learning_rate": 9.85432517092291e-06,
|
35256 |
+
"loss": 18.4288,
|
35257 |
+
"step": 50240
|
35258 |
+
},
|
35259 |
+
{
|
35260 |
+
"epoch": 0.9325047495111309,
|
35261 |
+
"grad_norm": 35.90625,
|
35262 |
+
"learning_rate": 9.854296175136868e-06,
|
35263 |
+
"loss": 19.0825,
|
35264 |
+
"step": 50250
|
35265 |
+
},
|
35266 |
+
{
|
35267 |
+
"epoch": 0.9326903225956107,
|
35268 |
+
"grad_norm": 38.90625,
|
35269 |
+
"learning_rate": 9.854267179350825e-06,
|
35270 |
+
"loss": 19.0764,
|
35271 |
+
"step": 50260
|
35272 |
+
},
|
35273 |
+
{
|
35274 |
+
"epoch": 0.9328758956800905,
|
35275 |
+
"grad_norm": 37.75,
|
35276 |
+
"learning_rate": 9.854238183564784e-06,
|
35277 |
+
"loss": 18.8452,
|
35278 |
+
"step": 50270
|
35279 |
+
},
|
35280 |
+
{
|
35281 |
+
"epoch": 0.9330614687645704,
|
35282 |
+
"grad_norm": 34.59375,
|
35283 |
+
"learning_rate": 9.85420918777874e-06,
|
35284 |
+
"loss": 18.7928,
|
35285 |
+
"step": 50280
|
35286 |
+
},
|
35287 |
+
{
|
35288 |
+
"epoch": 0.9332470418490502,
|
35289 |
+
"grad_norm": 33.78125,
|
35290 |
+
"learning_rate": 9.854180191992697e-06,
|
35291 |
+
"loss": 18.1697,
|
35292 |
+
"step": 50290
|
35293 |
+
},
|
35294 |
+
{
|
35295 |
+
"epoch": 0.93343261493353,
|
35296 |
+
"grad_norm": 37.875,
|
35297 |
+
"learning_rate": 9.854151196206657e-06,
|
35298 |
+
"loss": 18.7602,
|
35299 |
+
"step": 50300
|
35300 |
+
},
|
35301 |
+
{
|
35302 |
+
"epoch": 0.9336181880180099,
|
35303 |
+
"grad_norm": 35.84375,
|
35304 |
+
"learning_rate": 9.854122200420614e-06,
|
35305 |
+
"loss": 18.7859,
|
35306 |
+
"step": 50310
|
35307 |
+
},
|
35308 |
+
{
|
35309 |
+
"epoch": 0.9338037611024897,
|
35310 |
+
"grad_norm": 37.0625,
|
35311 |
+
"learning_rate": 9.854093204634571e-06,
|
35312 |
+
"loss": 18.5895,
|
35313 |
+
"step": 50320
|
35314 |
+
},
|
35315 |
+
{
|
35316 |
+
"epoch": 0.9339893341869695,
|
35317 |
+
"grad_norm": 34.59375,
|
35318 |
+
"learning_rate": 9.854064208848529e-06,
|
35319 |
+
"loss": 18.5902,
|
35320 |
+
"step": 50330
|
35321 |
+
},
|
35322 |
+
{
|
35323 |
+
"epoch": 0.9341749072714494,
|
35324 |
+
"grad_norm": 37.28125,
|
35325 |
+
"learning_rate": 9.854035213062486e-06,
|
35326 |
+
"loss": 19.0408,
|
35327 |
+
"step": 50340
|
35328 |
+
},
|
35329 |
+
{
|
35330 |
+
"epoch": 0.9343604803559292,
|
35331 |
+
"grad_norm": 34.125,
|
35332 |
+
"learning_rate": 9.854006217276444e-06,
|
35333 |
+
"loss": 18.3564,
|
35334 |
+
"step": 50350
|
35335 |
+
},
|
35336 |
+
{
|
35337 |
+
"epoch": 0.934546053440409,
|
35338 |
+
"grad_norm": 38.4375,
|
35339 |
+
"learning_rate": 9.853977221490401e-06,
|
35340 |
+
"loss": 18.8423,
|
35341 |
+
"step": 50360
|
35342 |
+
},
|
35343 |
+
{
|
35344 |
+
"epoch": 0.9347316265248888,
|
35345 |
+
"grad_norm": 35.8125,
|
35346 |
+
"learning_rate": 9.85394822570436e-06,
|
35347 |
+
"loss": 18.3884,
|
35348 |
+
"step": 50370
|
35349 |
+
},
|
35350 |
+
{
|
35351 |
+
"epoch": 0.9349171996093687,
|
35352 |
+
"grad_norm": 35.4375,
|
35353 |
+
"learning_rate": 9.853919229918318e-06,
|
35354 |
+
"loss": 19.1228,
|
35355 |
+
"step": 50380
|
35356 |
+
},
|
35357 |
+
{
|
35358 |
+
"epoch": 0.9351027726938484,
|
35359 |
+
"grad_norm": 38.21875,
|
35360 |
+
"learning_rate": 9.853890234132273e-06,
|
35361 |
+
"loss": 18.5342,
|
35362 |
+
"step": 50390
|
35363 |
+
},
|
35364 |
+
{
|
35365 |
+
"epoch": 0.9352883457783283,
|
35366 |
+
"grad_norm": 35.125,
|
35367 |
+
"learning_rate": 9.853861238346232e-06,
|
35368 |
+
"loss": 19.0061,
|
35369 |
+
"step": 50400
|
35370 |
+
},
|
35371 |
+
{
|
35372 |
+
"epoch": 0.9354739188628082,
|
35373 |
+
"grad_norm": 36.53125,
|
35374 |
+
"learning_rate": 9.85383224256019e-06,
|
35375 |
+
"loss": 18.631,
|
35376 |
+
"step": 50410
|
35377 |
+
},
|
35378 |
+
{
|
35379 |
+
"epoch": 0.9356594919472879,
|
35380 |
+
"grad_norm": 37.0625,
|
35381 |
+
"learning_rate": 9.853803246774147e-06,
|
35382 |
+
"loss": 18.9214,
|
35383 |
+
"step": 50420
|
35384 |
+
},
|
35385 |
+
{
|
35386 |
+
"epoch": 0.9358450650317678,
|
35387 |
+
"grad_norm": 38.3125,
|
35388 |
+
"learning_rate": 9.853774250988105e-06,
|
35389 |
+
"loss": 18.8773,
|
35390 |
+
"step": 50430
|
35391 |
+
},
|
35392 |
+
{
|
35393 |
+
"epoch": 0.9360306381162476,
|
35394 |
+
"grad_norm": 38.125,
|
35395 |
+
"learning_rate": 9.853745255202062e-06,
|
35396 |
+
"loss": 19.1509,
|
35397 |
+
"step": 50440
|
35398 |
+
},
|
35399 |
+
{
|
35400 |
+
"epoch": 0.9362162112007274,
|
35401 |
+
"grad_norm": 35.28125,
|
35402 |
+
"learning_rate": 9.85371625941602e-06,
|
35403 |
+
"loss": 18.7762,
|
35404 |
+
"step": 50450
|
35405 |
+
},
|
35406 |
+
{
|
35407 |
+
"epoch": 0.9364017842852073,
|
35408 |
+
"grad_norm": 37.09375,
|
35409 |
+
"learning_rate": 9.853687263629977e-06,
|
35410 |
+
"loss": 18.6366,
|
35411 |
+
"step": 50460
|
35412 |
+
},
|
35413 |
+
{
|
35414 |
+
"epoch": 0.9365873573696871,
|
35415 |
+
"grad_norm": 37.71875,
|
35416 |
+
"learning_rate": 9.853658267843936e-06,
|
35417 |
+
"loss": 18.7425,
|
35418 |
+
"step": 50470
|
35419 |
+
},
|
35420 |
+
{
|
35421 |
+
"epoch": 0.9367729304541669,
|
35422 |
+
"grad_norm": 36.28125,
|
35423 |
+
"learning_rate": 9.853629272057893e-06,
|
35424 |
+
"loss": 18.5633,
|
35425 |
+
"step": 50480
|
35426 |
+
},
|
35427 |
+
{
|
35428 |
+
"epoch": 0.9369585035386467,
|
35429 |
+
"grad_norm": 37.65625,
|
35430 |
+
"learning_rate": 9.853600276271849e-06,
|
35431 |
+
"loss": 18.5449,
|
35432 |
+
"step": 50490
|
35433 |
+
},
|
35434 |
+
{
|
35435 |
+
"epoch": 0.9371440766231266,
|
35436 |
+
"grad_norm": 36.34375,
|
35437 |
+
"learning_rate": 9.853571280485808e-06,
|
35438 |
+
"loss": 19.0544,
|
35439 |
+
"step": 50500
|
35440 |
+
},
|
35441 |
+
{
|
35442 |
+
"epoch": 0.9373296497076065,
|
35443 |
+
"grad_norm": 37.71875,
|
35444 |
+
"learning_rate": 9.853542284699766e-06,
|
35445 |
+
"loss": 18.8355,
|
35446 |
+
"step": 50510
|
35447 |
+
},
|
35448 |
+
{
|
35449 |
+
"epoch": 0.9375152227920862,
|
35450 |
+
"grad_norm": 37.03125,
|
35451 |
+
"learning_rate": 9.853513288913723e-06,
|
35452 |
+
"loss": 18.7822,
|
35453 |
+
"step": 50520
|
35454 |
+
},
|
35455 |
+
{
|
35456 |
+
"epoch": 0.9377007958765661,
|
35457 |
+
"grad_norm": 35.71875,
|
35458 |
+
"learning_rate": 9.85348429312768e-06,
|
35459 |
+
"loss": 18.5719,
|
35460 |
+
"step": 50530
|
35461 |
+
},
|
35462 |
+
{
|
35463 |
+
"epoch": 0.9378863689610459,
|
35464 |
+
"grad_norm": 35.84375,
|
35465 |
+
"learning_rate": 9.85345529734164e-06,
|
35466 |
+
"loss": 18.8113,
|
35467 |
+
"step": 50540
|
35468 |
+
},
|
35469 |
+
{
|
35470 |
+
"epoch": 0.9380719420455257,
|
35471 |
+
"grad_norm": 37.28125,
|
35472 |
+
"learning_rate": 9.853426301555595e-06,
|
35473 |
+
"loss": 18.984,
|
35474 |
+
"step": 50550
|
35475 |
+
},
|
35476 |
+
{
|
35477 |
+
"epoch": 0.9382575151300055,
|
35478 |
+
"grad_norm": 35.28125,
|
35479 |
+
"learning_rate": 9.853397305769553e-06,
|
35480 |
+
"loss": 18.7287,
|
35481 |
+
"step": 50560
|
35482 |
+
},
|
35483 |
+
{
|
35484 |
+
"epoch": 0.9384430882144854,
|
35485 |
+
"grad_norm": 35.96875,
|
35486 |
+
"learning_rate": 9.853368309983512e-06,
|
35487 |
+
"loss": 18.6256,
|
35488 |
+
"step": 50570
|
35489 |
+
},
|
35490 |
+
{
|
35491 |
+
"epoch": 0.9386286612989652,
|
35492 |
+
"grad_norm": 35.96875,
|
35493 |
+
"learning_rate": 9.853339314197469e-06,
|
35494 |
+
"loss": 18.8356,
|
35495 |
+
"step": 50580
|
35496 |
+
},
|
35497 |
+
{
|
35498 |
+
"epoch": 0.938814234383445,
|
35499 |
+
"grad_norm": 37.75,
|
35500 |
+
"learning_rate": 9.853310318411426e-06,
|
35501 |
+
"loss": 18.5802,
|
35502 |
+
"step": 50590
|
35503 |
+
},
|
35504 |
+
{
|
35505 |
+
"epoch": 0.9389998074679249,
|
35506 |
+
"grad_norm": 35.84375,
|
35507 |
+
"learning_rate": 9.853281322625384e-06,
|
35508 |
+
"loss": 18.9444,
|
35509 |
+
"step": 50600
|
35510 |
+
},
|
35511 |
+
{
|
35512 |
+
"epoch": 0.9391853805524046,
|
35513 |
+
"grad_norm": 37.4375,
|
35514 |
+
"learning_rate": 9.853252326839341e-06,
|
35515 |
+
"loss": 18.9265,
|
35516 |
+
"step": 50610
|
35517 |
+
},
|
35518 |
+
{
|
35519 |
+
"epoch": 0.9393709536368845,
|
35520 |
+
"grad_norm": 35.625,
|
35521 |
+
"learning_rate": 9.853223331053299e-06,
|
35522 |
+
"loss": 19.3268,
|
35523 |
+
"step": 50620
|
35524 |
+
},
|
35525 |
+
{
|
35526 |
+
"epoch": 0.9395565267213644,
|
35527 |
+
"grad_norm": 35.25,
|
35528 |
+
"learning_rate": 9.853194335267256e-06,
|
35529 |
+
"loss": 18.3115,
|
35530 |
+
"step": 50630
|
35531 |
+
},
|
35532 |
+
{
|
35533 |
+
"epoch": 0.9397420998058441,
|
35534 |
+
"grad_norm": 37.53125,
|
35535 |
+
"learning_rate": 9.853165339481214e-06,
|
35536 |
+
"loss": 19.0,
|
35537 |
+
"step": 50640
|
35538 |
+
},
|
35539 |
+
{
|
35540 |
+
"epoch": 0.939927672890324,
|
35541 |
+
"grad_norm": 36.4375,
|
35542 |
+
"learning_rate": 9.853136343695173e-06,
|
35543 |
+
"loss": 18.8586,
|
35544 |
+
"step": 50650
|
35545 |
+
},
|
35546 |
+
{
|
35547 |
+
"epoch": 0.9401132459748038,
|
35548 |
+
"grad_norm": 37.53125,
|
35549 |
+
"learning_rate": 9.853107347909128e-06,
|
35550 |
+
"loss": 18.5812,
|
35551 |
+
"step": 50660
|
35552 |
+
},
|
35553 |
+
{
|
35554 |
+
"epoch": 0.9402988190592837,
|
35555 |
+
"grad_norm": 37.25,
|
35556 |
+
"learning_rate": 9.853078352123087e-06,
|
35557 |
+
"loss": 18.7525,
|
35558 |
+
"step": 50670
|
35559 |
+
},
|
35560 |
+
{
|
35561 |
+
"epoch": 0.9404843921437634,
|
35562 |
+
"grad_norm": 37.4375,
|
35563 |
+
"learning_rate": 9.853049356337045e-06,
|
35564 |
+
"loss": 18.8099,
|
35565 |
+
"step": 50680
|
35566 |
+
},
|
35567 |
+
{
|
35568 |
+
"epoch": 0.9406699652282433,
|
35569 |
+
"grad_norm": 37.875,
|
35570 |
+
"learning_rate": 9.853020360551002e-06,
|
35571 |
+
"loss": 18.8271,
|
35572 |
+
"step": 50690
|
35573 |
+
},
|
35574 |
+
{
|
35575 |
+
"epoch": 0.9408555383127232,
|
35576 |
+
"grad_norm": 36.59375,
|
35577 |
+
"learning_rate": 9.85299136476496e-06,
|
35578 |
+
"loss": 18.7296,
|
35579 |
+
"step": 50700
|
35580 |
+
},
|
35581 |
+
{
|
35582 |
+
"epoch": 0.9410411113972029,
|
35583 |
+
"grad_norm": 36.75,
|
35584 |
+
"learning_rate": 9.852962368978917e-06,
|
35585 |
+
"loss": 18.7892,
|
35586 |
+
"step": 50710
|
35587 |
+
},
|
35588 |
+
{
|
35589 |
+
"epoch": 0.9412266844816828,
|
35590 |
+
"grad_norm": 35.6875,
|
35591 |
+
"learning_rate": 9.852933373192874e-06,
|
35592 |
+
"loss": 18.672,
|
35593 |
+
"step": 50720
|
35594 |
+
},
|
35595 |
+
{
|
35596 |
+
"epoch": 0.9414122575661626,
|
35597 |
+
"grad_norm": 37.28125,
|
35598 |
+
"learning_rate": 9.852904377406832e-06,
|
35599 |
+
"loss": 18.4865,
|
35600 |
+
"step": 50730
|
35601 |
+
},
|
35602 |
+
{
|
35603 |
+
"epoch": 0.9415978306506424,
|
35604 |
+
"grad_norm": 36.59375,
|
35605 |
+
"learning_rate": 9.85287538162079e-06,
|
35606 |
+
"loss": 18.6127,
|
35607 |
+
"step": 50740
|
35608 |
+
},
|
35609 |
+
{
|
35610 |
+
"epoch": 0.9417834037351223,
|
35611 |
+
"grad_norm": 35.0625,
|
35612 |
+
"learning_rate": 9.852846385834748e-06,
|
35613 |
+
"loss": 18.6142,
|
35614 |
+
"step": 50750
|
35615 |
+
},
|
35616 |
+
{
|
35617 |
+
"epoch": 0.9419689768196021,
|
35618 |
+
"grad_norm": 35.71875,
|
35619 |
+
"learning_rate": 9.852817390048704e-06,
|
35620 |
+
"loss": 18.4889,
|
35621 |
+
"step": 50760
|
35622 |
+
},
|
35623 |
+
{
|
35624 |
+
"epoch": 0.9421545499040819,
|
35625 |
+
"grad_norm": 36.03125,
|
35626 |
+
"learning_rate": 9.852788394262661e-06,
|
35627 |
+
"loss": 18.0942,
|
35628 |
+
"step": 50770
|
35629 |
+
},
|
35630 |
+
{
|
35631 |
+
"epoch": 0.9423401229885617,
|
35632 |
+
"grad_norm": 38.0625,
|
35633 |
+
"learning_rate": 9.85275939847662e-06,
|
35634 |
+
"loss": 18.5683,
|
35635 |
+
"step": 50780
|
35636 |
+
},
|
35637 |
+
{
|
35638 |
+
"epoch": 0.9425256960730416,
|
35639 |
+
"grad_norm": 37.0625,
|
35640 |
+
"learning_rate": 9.852730402690578e-06,
|
35641 |
+
"loss": 18.7116,
|
35642 |
+
"step": 50790
|
35643 |
+
},
|
35644 |
+
{
|
35645 |
+
"epoch": 0.9427112691575213,
|
35646 |
+
"grad_norm": 34.65625,
|
35647 |
+
"learning_rate": 9.852701406904535e-06,
|
35648 |
+
"loss": 18.6401,
|
35649 |
+
"step": 50800
|
35650 |
+
},
|
35651 |
+
{
|
35652 |
+
"epoch": 0.9428968422420012,
|
35653 |
+
"grad_norm": 36.125,
|
35654 |
+
"learning_rate": 9.852672411118493e-06,
|
35655 |
+
"loss": 19.084,
|
35656 |
+
"step": 50810
|
35657 |
+
},
|
35658 |
+
{
|
35659 |
+
"epoch": 0.9430824153264811,
|
35660 |
+
"grad_norm": 37.34375,
|
35661 |
+
"learning_rate": 9.85264341533245e-06,
|
35662 |
+
"loss": 18.4527,
|
35663 |
+
"step": 50820
|
35664 |
+
},
|
35665 |
+
{
|
35666 |
+
"epoch": 0.9432679884109608,
|
35667 |
+
"grad_norm": 34.96875,
|
35668 |
+
"learning_rate": 9.852614419546408e-06,
|
35669 |
+
"loss": 18.6703,
|
35670 |
+
"step": 50830
|
35671 |
+
},
|
35672 |
+
{
|
35673 |
+
"epoch": 0.9434535614954407,
|
35674 |
+
"grad_norm": 39.15625,
|
35675 |
+
"learning_rate": 9.852585423760365e-06,
|
35676 |
+
"loss": 18.8782,
|
35677 |
+
"step": 50840
|
35678 |
+
},
|
35679 |
+
{
|
35680 |
+
"epoch": 0.9436391345799205,
|
35681 |
+
"grad_norm": 33.625,
|
35682 |
+
"learning_rate": 9.852556427974324e-06,
|
35683 |
+
"loss": 18.3635,
|
35684 |
+
"step": 50850
|
35685 |
+
},
|
35686 |
+
{
|
35687 |
+
"epoch": 0.9438247076644004,
|
35688 |
+
"grad_norm": 38.25,
|
35689 |
+
"learning_rate": 9.852527432188282e-06,
|
35690 |
+
"loss": 18.8192,
|
35691 |
+
"step": 50860
|
35692 |
+
},
|
35693 |
+
{
|
35694 |
+
"epoch": 0.9440102807488802,
|
35695 |
+
"grad_norm": 35.9375,
|
35696 |
+
"learning_rate": 9.852498436402237e-06,
|
35697 |
+
"loss": 18.9524,
|
35698 |
+
"step": 50870
|
35699 |
+
},
|
35700 |
+
{
|
35701 |
+
"epoch": 0.94419585383336,
|
35702 |
+
"grad_norm": 37.84375,
|
35703 |
+
"learning_rate": 9.852469440616196e-06,
|
35704 |
+
"loss": 18.9869,
|
35705 |
+
"step": 50880
|
35706 |
+
},
|
35707 |
+
{
|
35708 |
+
"epoch": 0.9443814269178399,
|
35709 |
+
"grad_norm": 37.4375,
|
35710 |
+
"learning_rate": 9.852440444830154e-06,
|
35711 |
+
"loss": 18.8108,
|
35712 |
+
"step": 50890
|
35713 |
+
},
|
35714 |
+
{
|
35715 |
+
"epoch": 0.9445670000023196,
|
35716 |
+
"grad_norm": 36.1875,
|
35717 |
+
"learning_rate": 9.852411449044111e-06,
|
35718 |
+
"loss": 18.7039,
|
35719 |
+
"step": 50900
|
35720 |
+
},
|
35721 |
+
{
|
35722 |
+
"epoch": 0.9447525730867995,
|
35723 |
+
"grad_norm": 38.59375,
|
35724 |
+
"learning_rate": 9.852382453258069e-06,
|
35725 |
+
"loss": 18.6205,
|
35726 |
+
"step": 50910
|
35727 |
+
},
|
35728 |
+
{
|
35729 |
+
"epoch": 0.9449381461712794,
|
35730 |
+
"grad_norm": 38.09375,
|
35731 |
+
"learning_rate": 9.852353457472026e-06,
|
35732 |
+
"loss": 18.9012,
|
35733 |
+
"step": 50920
|
35734 |
+
},
|
35735 |
+
{
|
35736 |
+
"epoch": 0.9451237192557591,
|
35737 |
+
"grad_norm": 35.03125,
|
35738 |
+
"learning_rate": 9.852324461685983e-06,
|
35739 |
+
"loss": 18.9504,
|
35740 |
+
"step": 50930
|
35741 |
+
},
|
35742 |
+
{
|
35743 |
+
"epoch": 0.945309292340239,
|
35744 |
+
"grad_norm": 36.78125,
|
35745 |
+
"learning_rate": 9.85229546589994e-06,
|
35746 |
+
"loss": 18.4827,
|
35747 |
+
"step": 50940
|
35748 |
+
},
|
35749 |
+
{
|
35750 |
+
"epoch": 0.9454948654247188,
|
35751 |
+
"grad_norm": 37.5625,
|
35752 |
+
"learning_rate": 9.8522664701139e-06,
|
35753 |
+
"loss": 19.0303,
|
35754 |
+
"step": 50950
|
35755 |
+
},
|
35756 |
+
{
|
35757 |
+
"epoch": 0.9456804385091986,
|
35758 |
+
"grad_norm": 38.09375,
|
35759 |
+
"learning_rate": 9.852237474327857e-06,
|
35760 |
+
"loss": 18.2305,
|
35761 |
+
"step": 50960
|
35762 |
+
},
|
35763 |
+
{
|
35764 |
+
"epoch": 0.9458660115936784,
|
35765 |
+
"grad_norm": 35.65625,
|
35766 |
+
"learning_rate": 9.852208478541815e-06,
|
35767 |
+
"loss": 18.5792,
|
35768 |
+
"step": 50970
|
35769 |
+
},
|
35770 |
+
{
|
35771 |
+
"epoch": 0.9460515846781583,
|
35772 |
+
"grad_norm": 38.46875,
|
35773 |
+
"learning_rate": 9.852179482755772e-06,
|
35774 |
+
"loss": 18.5712,
|
35775 |
+
"step": 50980
|
35776 |
+
},
|
35777 |
+
{
|
35778 |
+
"epoch": 0.9462371577626381,
|
35779 |
+
"grad_norm": 37.34375,
|
35780 |
+
"learning_rate": 9.85215048696973e-06,
|
35781 |
+
"loss": 18.6123,
|
35782 |
+
"step": 50990
|
35783 |
+
},
|
35784 |
+
{
|
35785 |
+
"epoch": 0.9464227308471179,
|
35786 |
+
"grad_norm": 37.125,
|
35787 |
+
"learning_rate": 9.852121491183687e-06,
|
35788 |
+
"loss": 18.5901,
|
35789 |
+
"step": 51000
|
35790 |
+
},
|
35791 |
+
{
|
35792 |
+
"epoch": 0.9466083039315978,
|
35793 |
+
"grad_norm": 36.46875,
|
35794 |
+
"learning_rate": 9.852092495397644e-06,
|
35795 |
+
"loss": 19.0493,
|
35796 |
+
"step": 51010
|
35797 |
+
},
|
35798 |
+
{
|
35799 |
+
"epoch": 0.9467938770160775,
|
35800 |
+
"grad_norm": 35.375,
|
35801 |
+
"learning_rate": 9.852063499611603e-06,
|
35802 |
+
"loss": 18.5143,
|
35803 |
+
"step": 51020
|
35804 |
+
},
|
35805 |
+
{
|
35806 |
+
"epoch": 0.9469794501005574,
|
35807 |
+
"grad_norm": 34.375,
|
35808 |
+
"learning_rate": 9.85203450382556e-06,
|
35809 |
+
"loss": 18.7262,
|
35810 |
+
"step": 51030
|
35811 |
+
},
|
35812 |
+
{
|
35813 |
+
"epoch": 0.9471650231850373,
|
35814 |
+
"grad_norm": 35.65625,
|
35815 |
+
"learning_rate": 9.852005508039517e-06,
|
35816 |
+
"loss": 18.5407,
|
35817 |
+
"step": 51040
|
35818 |
+
},
|
35819 |
+
{
|
35820 |
+
"epoch": 0.9473505962695171,
|
35821 |
+
"grad_norm": 35.84375,
|
35822 |
+
"learning_rate": 9.851976512253476e-06,
|
35823 |
+
"loss": 18.7274,
|
35824 |
+
"step": 51050
|
35825 |
+
},
|
35826 |
+
{
|
35827 |
+
"epoch": 0.9475361693539969,
|
35828 |
+
"grad_norm": 35.5,
|
35829 |
+
"learning_rate": 9.851947516467433e-06,
|
35830 |
+
"loss": 18.6067,
|
35831 |
+
"step": 51060
|
35832 |
+
},
|
35833 |
+
{
|
35834 |
+
"epoch": 0.9477217424384767,
|
35835 |
+
"grad_norm": 35.96875,
|
35836 |
+
"learning_rate": 9.85191852068139e-06,
|
35837 |
+
"loss": 18.9094,
|
35838 |
+
"step": 51070
|
35839 |
+
},
|
35840 |
+
{
|
35841 |
+
"epoch": 0.9479073155229566,
|
35842 |
+
"grad_norm": 34.96875,
|
35843 |
+
"learning_rate": 9.851889524895348e-06,
|
35844 |
+
"loss": 18.7977,
|
35845 |
+
"step": 51080
|
35846 |
+
},
|
35847 |
+
{
|
35848 |
+
"epoch": 0.9480928886074363,
|
35849 |
+
"grad_norm": 37.75,
|
35850 |
+
"learning_rate": 9.851860529109305e-06,
|
35851 |
+
"loss": 19.2816,
|
35852 |
+
"step": 51090
|
35853 |
+
},
|
35854 |
+
{
|
35855 |
+
"epoch": 0.9482784616919162,
|
35856 |
+
"grad_norm": 34.4375,
|
35857 |
+
"learning_rate": 9.851831533323263e-06,
|
35858 |
+
"loss": 19.179,
|
35859 |
+
"step": 51100
|
35860 |
+
},
|
35861 |
+
{
|
35862 |
+
"epoch": 0.9484640347763961,
|
35863 |
+
"grad_norm": 35.96875,
|
35864 |
+
"learning_rate": 9.85180253753722e-06,
|
35865 |
+
"loss": 18.4052,
|
35866 |
+
"step": 51110
|
35867 |
+
},
|
35868 |
+
{
|
35869 |
+
"epoch": 0.9486496078608758,
|
35870 |
+
"grad_norm": 36.0,
|
35871 |
+
"learning_rate": 9.85177354175118e-06,
|
35872 |
+
"loss": 18.9433,
|
35873 |
+
"step": 51120
|
35874 |
+
},
|
35875 |
+
{
|
35876 |
+
"epoch": 0.9488351809453557,
|
35877 |
+
"grad_norm": 37.4375,
|
35878 |
+
"learning_rate": 9.851744545965137e-06,
|
35879 |
+
"loss": 19.0316,
|
35880 |
+
"step": 51130
|
35881 |
+
},
|
35882 |
+
{
|
35883 |
+
"epoch": 0.9490207540298355,
|
35884 |
+
"grad_norm": 37.40625,
|
35885 |
+
"learning_rate": 9.851715550179092e-06,
|
35886 |
+
"loss": 18.5718,
|
35887 |
+
"step": 51140
|
35888 |
+
},
|
35889 |
+
{
|
35890 |
+
"epoch": 0.9492063271143153,
|
35891 |
+
"grad_norm": 36.5625,
|
35892 |
+
"learning_rate": 9.851686554393051e-06,
|
35893 |
+
"loss": 18.4393,
|
35894 |
+
"step": 51150
|
35895 |
+
},
|
35896 |
+
{
|
35897 |
+
"epoch": 0.9493919001987952,
|
35898 |
+
"grad_norm": 35.84375,
|
35899 |
+
"learning_rate": 9.851657558607009e-06,
|
35900 |
+
"loss": 18.1548,
|
35901 |
+
"step": 51160
|
35902 |
+
},
|
35903 |
+
{
|
35904 |
+
"epoch": 0.949577473283275,
|
35905 |
+
"grad_norm": 36.40625,
|
35906 |
+
"learning_rate": 9.851628562820966e-06,
|
35907 |
+
"loss": 18.8235,
|
35908 |
+
"step": 51170
|
35909 |
+
},
|
35910 |
+
{
|
35911 |
+
"epoch": 0.9497630463677548,
|
35912 |
+
"grad_norm": 34.90625,
|
35913 |
+
"learning_rate": 9.851599567034924e-06,
|
35914 |
+
"loss": 18.7454,
|
35915 |
+
"step": 51180
|
35916 |
+
},
|
35917 |
+
{
|
35918 |
+
"epoch": 0.9499486194522346,
|
35919 |
+
"grad_norm": 37.21875,
|
35920 |
+
"learning_rate": 9.851570571248881e-06,
|
35921 |
+
"loss": 18.0793,
|
35922 |
+
"step": 51190
|
35923 |
+
},
|
35924 |
+
{
|
35925 |
+
"epoch": 0.9501341925367145,
|
35926 |
+
"grad_norm": 36.125,
|
35927 |
+
"learning_rate": 9.851541575462838e-06,
|
35928 |
+
"loss": 18.5923,
|
35929 |
+
"step": 51200
|
35930 |
+
},
|
35931 |
+
{
|
35932 |
+
"epoch": 0.9503197656211944,
|
35933 |
+
"grad_norm": 37.3125,
|
35934 |
+
"learning_rate": 9.851512579676796e-06,
|
35935 |
+
"loss": 19.1447,
|
35936 |
+
"step": 51210
|
35937 |
+
},
|
35938 |
+
{
|
35939 |
+
"epoch": 0.9505053387056741,
|
35940 |
+
"grad_norm": 35.96875,
|
35941 |
+
"learning_rate": 9.851483583890753e-06,
|
35942 |
+
"loss": 18.39,
|
35943 |
+
"step": 51220
|
35944 |
+
},
|
35945 |
+
{
|
35946 |
+
"epoch": 0.950690911790154,
|
35947 |
+
"grad_norm": 37.96875,
|
35948 |
+
"learning_rate": 9.851454588104712e-06,
|
35949 |
+
"loss": 18.8331,
|
35950 |
+
"step": 51230
|
35951 |
+
},
|
35952 |
+
{
|
35953 |
+
"epoch": 0.9508764848746338,
|
35954 |
+
"grad_norm": 37.1875,
|
35955 |
+
"learning_rate": 9.85142559231867e-06,
|
35956 |
+
"loss": 18.3683,
|
35957 |
+
"step": 51240
|
35958 |
+
},
|
35959 |
+
{
|
35960 |
+
"epoch": 0.9510620579591136,
|
35961 |
+
"grad_norm": 36.4375,
|
35962 |
+
"learning_rate": 9.851396596532627e-06,
|
35963 |
+
"loss": 18.497,
|
35964 |
+
"step": 51250
|
35965 |
+
},
|
35966 |
+
{
|
35967 |
+
"epoch": 0.9512476310435934,
|
35968 |
+
"grad_norm": 37.3125,
|
35969 |
+
"learning_rate": 9.851367600746585e-06,
|
35970 |
+
"loss": 18.5485,
|
35971 |
+
"step": 51260
|
35972 |
+
},
|
35973 |
+
{
|
35974 |
+
"epoch": 0.9514332041280733,
|
35975 |
+
"grad_norm": 37.5625,
|
35976 |
+
"learning_rate": 9.851338604960542e-06,
|
35977 |
+
"loss": 18.4425,
|
35978 |
+
"step": 51270
|
35979 |
+
},
|
35980 |
+
{
|
35981 |
+
"epoch": 0.9516187772125531,
|
35982 |
+
"grad_norm": 37.96875,
|
35983 |
+
"learning_rate": 9.8513096091745e-06,
|
35984 |
+
"loss": 18.5681,
|
35985 |
+
"step": 51280
|
35986 |
+
},
|
35987 |
+
{
|
35988 |
+
"epoch": 0.9518043502970329,
|
35989 |
+
"grad_norm": 36.53125,
|
35990 |
+
"learning_rate": 9.851280613388457e-06,
|
35991 |
+
"loss": 18.5103,
|
35992 |
+
"step": 51290
|
35993 |
+
},
|
35994 |
+
{
|
35995 |
+
"epoch": 0.9519899233815128,
|
35996 |
+
"grad_norm": 35.46875,
|
35997 |
+
"learning_rate": 9.851251617602414e-06,
|
35998 |
+
"loss": 18.7339,
|
35999 |
+
"step": 51300
|
36000 |
+
},
|
36001 |
+
{
|
36002 |
+
"epoch": 0.9521754964659925,
|
36003 |
+
"grad_norm": 35.28125,
|
36004 |
+
"learning_rate": 9.851222621816372e-06,
|
36005 |
+
"loss": 18.6217,
|
36006 |
+
"step": 51310
|
36007 |
+
},
|
36008 |
+
{
|
36009 |
+
"epoch": 0.9523610695504724,
|
36010 |
+
"grad_norm": 37.125,
|
36011 |
+
"learning_rate": 9.851193626030329e-06,
|
36012 |
+
"loss": 18.5986,
|
36013 |
+
"step": 51320
|
36014 |
+
},
|
36015 |
+
{
|
36016 |
+
"epoch": 0.9525466426349523,
|
36017 |
+
"grad_norm": 39.3125,
|
36018 |
+
"learning_rate": 9.851164630244288e-06,
|
36019 |
+
"loss": 18.7082,
|
36020 |
+
"step": 51330
|
36021 |
+
},
|
36022 |
+
{
|
36023 |
+
"epoch": 0.952732215719432,
|
36024 |
+
"grad_norm": 35.78125,
|
36025 |
+
"learning_rate": 9.851135634458246e-06,
|
36026 |
+
"loss": 18.6332,
|
36027 |
+
"step": 51340
|
36028 |
+
},
|
36029 |
+
{
|
36030 |
+
"epoch": 0.9529177888039119,
|
36031 |
+
"grad_norm": 35.78125,
|
36032 |
+
"learning_rate": 9.851106638672201e-06,
|
36033 |
+
"loss": 18.7516,
|
36034 |
+
"step": 51350
|
36035 |
+
},
|
36036 |
+
{
|
36037 |
+
"epoch": 0.9531033618883917,
|
36038 |
+
"grad_norm": 35.75,
|
36039 |
+
"learning_rate": 9.85107764288616e-06,
|
36040 |
+
"loss": 18.8567,
|
36041 |
+
"step": 51360
|
36042 |
+
},
|
36043 |
+
{
|
36044 |
+
"epoch": 0.9532889349728715,
|
36045 |
+
"grad_norm": 35.15625,
|
36046 |
+
"learning_rate": 9.851048647100118e-06,
|
36047 |
+
"loss": 18.5148,
|
36048 |
+
"step": 51370
|
36049 |
+
},
|
36050 |
+
{
|
36051 |
+
"epoch": 0.9534745080573513,
|
36052 |
+
"grad_norm": 37.71875,
|
36053 |
+
"learning_rate": 9.851019651314075e-06,
|
36054 |
+
"loss": 18.6169,
|
36055 |
+
"step": 51380
|
36056 |
+
},
|
36057 |
+
{
|
36058 |
+
"epoch": 0.9536600811418312,
|
36059 |
+
"grad_norm": 36.375,
|
36060 |
+
"learning_rate": 9.850990655528033e-06,
|
36061 |
+
"loss": 18.8565,
|
36062 |
+
"step": 51390
|
36063 |
+
},
|
36064 |
+
{
|
36065 |
+
"epoch": 0.9538456542263111,
|
36066 |
+
"grad_norm": 36.28125,
|
36067 |
+
"learning_rate": 9.850961659741992e-06,
|
36068 |
+
"loss": 18.5506,
|
36069 |
+
"step": 51400
|
36070 |
+
},
|
36071 |
+
{
|
36072 |
+
"epoch": 0.9540312273107908,
|
36073 |
+
"grad_norm": 37.90625,
|
36074 |
+
"learning_rate": 9.850932663955947e-06,
|
36075 |
+
"loss": 18.6192,
|
36076 |
+
"step": 51410
|
36077 |
+
},
|
36078 |
+
{
|
36079 |
+
"epoch": 0.9542168003952707,
|
36080 |
+
"grad_norm": 38.71875,
|
36081 |
+
"learning_rate": 9.850903668169905e-06,
|
36082 |
+
"loss": 18.5314,
|
36083 |
+
"step": 51420
|
36084 |
+
},
|
36085 |
+
{
|
36086 |
+
"epoch": 0.9544023734797505,
|
36087 |
+
"grad_norm": 35.84375,
|
36088 |
+
"learning_rate": 9.850874672383864e-06,
|
36089 |
+
"loss": 18.5046,
|
36090 |
+
"step": 51430
|
36091 |
+
},
|
36092 |
+
{
|
36093 |
+
"epoch": 0.9545879465642303,
|
36094 |
+
"grad_norm": 34.71875,
|
36095 |
+
"learning_rate": 9.850845676597821e-06,
|
36096 |
+
"loss": 18.7414,
|
36097 |
+
"step": 51440
|
36098 |
+
},
|
36099 |
+
{
|
36100 |
+
"epoch": 0.9547735196487102,
|
36101 |
+
"grad_norm": 38.21875,
|
36102 |
+
"learning_rate": 9.850816680811779e-06,
|
36103 |
+
"loss": 18.5634,
|
36104 |
+
"step": 51450
|
36105 |
+
},
|
36106 |
+
{
|
36107 |
+
"epoch": 0.95495909273319,
|
36108 |
+
"grad_norm": 38.0,
|
36109 |
+
"learning_rate": 9.850787685025736e-06,
|
36110 |
+
"loss": 18.6599,
|
36111 |
+
"step": 51460
|
36112 |
+
},
|
36113 |
+
{
|
36114 |
+
"epoch": 0.9551446658176698,
|
36115 |
+
"grad_norm": 36.65625,
|
36116 |
+
"learning_rate": 9.850758689239694e-06,
|
36117 |
+
"loss": 18.7972,
|
36118 |
+
"step": 51470
|
36119 |
+
},
|
36120 |
+
{
|
36121 |
+
"epoch": 0.9553302389021496,
|
36122 |
+
"grad_norm": 38.3125,
|
36123 |
+
"learning_rate": 9.850729693453651e-06,
|
36124 |
+
"loss": 18.6985,
|
36125 |
+
"step": 51480
|
36126 |
+
},
|
36127 |
+
{
|
36128 |
+
"epoch": 0.9555158119866295,
|
36129 |
+
"grad_norm": 36.40625,
|
36130 |
+
"learning_rate": 9.850700697667608e-06,
|
36131 |
+
"loss": 18.596,
|
36132 |
+
"step": 51490
|
36133 |
+
},
|
36134 |
+
{
|
36135 |
+
"epoch": 0.9557013850711092,
|
36136 |
+
"grad_norm": 36.65625,
|
36137 |
+
"learning_rate": 9.850671701881567e-06,
|
36138 |
+
"loss": 18.604,
|
36139 |
+
"step": 51500
|
36140 |
+
},
|
36141 |
+
{
|
36142 |
+
"epoch": 0.9558869581555891,
|
36143 |
+
"grad_norm": 38.15625,
|
36144 |
+
"learning_rate": 9.850642706095523e-06,
|
36145 |
+
"loss": 18.7036,
|
36146 |
+
"step": 51510
|
36147 |
+
},
|
36148 |
+
{
|
36149 |
+
"epoch": 0.956072531240069,
|
36150 |
+
"grad_norm": 37.21875,
|
36151 |
+
"learning_rate": 9.85061371030948e-06,
|
36152 |
+
"loss": 19.1632,
|
36153 |
+
"step": 51520
|
36154 |
+
},
|
36155 |
+
{
|
36156 |
+
"epoch": 0.9562581043245487,
|
36157 |
+
"grad_norm": 34.875,
|
36158 |
+
"learning_rate": 9.85058471452344e-06,
|
36159 |
+
"loss": 18.6759,
|
36160 |
+
"step": 51530
|
36161 |
+
},
|
36162 |
+
{
|
36163 |
+
"epoch": 0.9564436774090286,
|
36164 |
+
"grad_norm": 37.4375,
|
36165 |
+
"learning_rate": 9.850555718737397e-06,
|
36166 |
+
"loss": 18.9442,
|
36167 |
+
"step": 51540
|
36168 |
+
},
|
36169 |
+
{
|
36170 |
+
"epoch": 0.9566292504935084,
|
36171 |
+
"grad_norm": 36.96875,
|
36172 |
+
"learning_rate": 9.850526722951354e-06,
|
36173 |
+
"loss": 18.8462,
|
36174 |
+
"step": 51550
|
36175 |
+
},
|
36176 |
+
{
|
36177 |
+
"epoch": 0.9568148235779882,
|
36178 |
+
"grad_norm": 37.40625,
|
36179 |
+
"learning_rate": 9.850497727165312e-06,
|
36180 |
+
"loss": 18.6035,
|
36181 |
+
"step": 51560
|
36182 |
+
},
|
36183 |
+
{
|
36184 |
+
"epoch": 0.9570003966624681,
|
36185 |
+
"grad_norm": 36.46875,
|
36186 |
+
"learning_rate": 9.85046873137927e-06,
|
36187 |
+
"loss": 18.6443,
|
36188 |
+
"step": 51570
|
36189 |
+
},
|
36190 |
+
{
|
36191 |
+
"epoch": 0.9571859697469479,
|
36192 |
+
"grad_norm": 39.125,
|
36193 |
+
"learning_rate": 9.850439735593227e-06,
|
36194 |
+
"loss": 18.9952,
|
36195 |
+
"step": 51580
|
36196 |
+
},
|
36197 |
+
{
|
36198 |
+
"epoch": 0.9573715428314278,
|
36199 |
+
"grad_norm": 38.1875,
|
36200 |
+
"learning_rate": 9.850410739807184e-06,
|
36201 |
+
"loss": 18.9026,
|
36202 |
+
"step": 51590
|
36203 |
+
},
|
36204 |
+
{
|
36205 |
+
"epoch": 0.9575571159159075,
|
36206 |
+
"grad_norm": 35.5625,
|
36207 |
+
"learning_rate": 9.850381744021143e-06,
|
36208 |
+
"loss": 18.7487,
|
36209 |
+
"step": 51600
|
36210 |
+
},
|
36211 |
+
{
|
36212 |
+
"epoch": 0.9577426890003874,
|
36213 |
+
"grad_norm": 38.0,
|
36214 |
+
"learning_rate": 9.8503527482351e-06,
|
36215 |
+
"loss": 18.9155,
|
36216 |
+
"step": 51610
|
36217 |
+
},
|
36218 |
+
{
|
36219 |
+
"epoch": 0.9579282620848673,
|
36220 |
+
"grad_norm": 34.65625,
|
36221 |
+
"learning_rate": 9.850323752449056e-06,
|
36222 |
+
"loss": 18.8469,
|
36223 |
+
"step": 51620
|
36224 |
+
},
|
36225 |
+
{
|
36226 |
+
"epoch": 0.958113835169347,
|
36227 |
+
"grad_norm": 37.0625,
|
36228 |
+
"learning_rate": 9.850294756663015e-06,
|
36229 |
+
"loss": 18.7591,
|
36230 |
+
"step": 51630
|
36231 |
+
},
|
36232 |
+
{
|
36233 |
+
"epoch": 0.9582994082538269,
|
36234 |
+
"grad_norm": 33.6875,
|
36235 |
+
"learning_rate": 9.850265760876973e-06,
|
36236 |
+
"loss": 18.4839,
|
36237 |
+
"step": 51640
|
36238 |
+
},
|
36239 |
+
{
|
36240 |
+
"epoch": 0.9584849813383067,
|
36241 |
+
"grad_norm": 34.71875,
|
36242 |
+
"learning_rate": 9.85023676509093e-06,
|
36243 |
+
"loss": 18.9344,
|
36244 |
+
"step": 51650
|
36245 |
+
},
|
36246 |
+
{
|
36247 |
+
"epoch": 0.9586705544227865,
|
36248 |
+
"grad_norm": 39.59375,
|
36249 |
+
"learning_rate": 9.850207769304888e-06,
|
36250 |
+
"loss": 18.9862,
|
36251 |
+
"step": 51660
|
36252 |
+
},
|
36253 |
+
{
|
36254 |
+
"epoch": 0.9588561275072663,
|
36255 |
+
"grad_norm": 37.875,
|
36256 |
+
"learning_rate": 9.850178773518845e-06,
|
36257 |
+
"loss": 18.5865,
|
36258 |
+
"step": 51670
|
36259 |
+
},
|
36260 |
+
{
|
36261 |
+
"epoch": 0.9590417005917462,
|
36262 |
+
"grad_norm": 36.8125,
|
36263 |
+
"learning_rate": 9.850149777732802e-06,
|
36264 |
+
"loss": 18.9963,
|
36265 |
+
"step": 51680
|
36266 |
+
},
|
36267 |
+
{
|
36268 |
+
"epoch": 0.959227273676226,
|
36269 |
+
"grad_norm": 37.21875,
|
36270 |
+
"learning_rate": 9.85012078194676e-06,
|
36271 |
+
"loss": 18.5694,
|
36272 |
+
"step": 51690
|
36273 |
+
},
|
36274 |
+
{
|
36275 |
+
"epoch": 0.9594128467607058,
|
36276 |
+
"grad_norm": 37.65625,
|
36277 |
+
"learning_rate": 9.850091786160719e-06,
|
36278 |
+
"loss": 18.5936,
|
36279 |
+
"step": 51700
|
36280 |
+
},
|
36281 |
+
{
|
36282 |
+
"epoch": 0.9595984198451857,
|
36283 |
+
"grad_norm": 38.09375,
|
36284 |
+
"learning_rate": 9.850062790374676e-06,
|
36285 |
+
"loss": 18.8217,
|
36286 |
+
"step": 51710
|
36287 |
+
},
|
36288 |
+
{
|
36289 |
+
"epoch": 0.9597839929296654,
|
36290 |
+
"grad_norm": 35.96875,
|
36291 |
+
"learning_rate": 9.850033794588634e-06,
|
36292 |
+
"loss": 18.8549,
|
36293 |
+
"step": 51720
|
36294 |
+
},
|
36295 |
+
{
|
36296 |
+
"epoch": 0.9599695660141453,
|
36297 |
+
"grad_norm": 36.46875,
|
36298 |
+
"learning_rate": 9.850004798802591e-06,
|
36299 |
+
"loss": 19.1592,
|
36300 |
+
"step": 51730
|
36301 |
+
},
|
36302 |
+
{
|
36303 |
+
"epoch": 0.9601551390986252,
|
36304 |
+
"grad_norm": 36.40625,
|
36305 |
+
"learning_rate": 9.849975803016549e-06,
|
36306 |
+
"loss": 18.723,
|
36307 |
+
"step": 51740
|
36308 |
+
},
|
36309 |
+
{
|
36310 |
+
"epoch": 0.960340712183105,
|
36311 |
+
"grad_norm": 36.5625,
|
36312 |
+
"learning_rate": 9.849946807230506e-06,
|
36313 |
+
"loss": 18.8267,
|
36314 |
+
"step": 51750
|
36315 |
+
},
|
36316 |
+
{
|
36317 |
+
"epoch": 0.9605262852675848,
|
36318 |
+
"grad_norm": 36.78125,
|
36319 |
+
"learning_rate": 9.849917811444463e-06,
|
36320 |
+
"loss": 18.6763,
|
36321 |
+
"step": 51760
|
36322 |
+
},
|
36323 |
+
{
|
36324 |
+
"epoch": 0.9607118583520646,
|
36325 |
+
"grad_norm": 33.90625,
|
36326 |
+
"learning_rate": 9.84988881565842e-06,
|
36327 |
+
"loss": 18.8519,
|
36328 |
+
"step": 51770
|
36329 |
+
},
|
36330 |
+
{
|
36331 |
+
"epoch": 0.9608974314365445,
|
36332 |
+
"grad_norm": 36.59375,
|
36333 |
+
"learning_rate": 9.849859819872378e-06,
|
36334 |
+
"loss": 18.4374,
|
36335 |
+
"step": 51780
|
36336 |
+
},
|
36337 |
+
{
|
36338 |
+
"epoch": 0.9610830045210242,
|
36339 |
+
"grad_norm": 36.4375,
|
36340 |
+
"learning_rate": 9.849830824086336e-06,
|
36341 |
+
"loss": 19.033,
|
36342 |
+
"step": 51790
|
36343 |
+
},
|
36344 |
+
{
|
36345 |
+
"epoch": 0.9612685776055041,
|
36346 |
+
"grad_norm": 36.3125,
|
36347 |
+
"learning_rate": 9.849801828300293e-06,
|
36348 |
+
"loss": 18.6082,
|
36349 |
+
"step": 51800
|
36350 |
+
},
|
36351 |
+
{
|
36352 |
+
"epoch": 0.961454150689984,
|
36353 |
+
"grad_norm": 37.625,
|
36354 |
+
"learning_rate": 9.849772832514252e-06,
|
36355 |
+
"loss": 18.4141,
|
36356 |
+
"step": 51810
|
36357 |
+
},
|
36358 |
+
{
|
36359 |
+
"epoch": 0.9616397237744637,
|
36360 |
+
"grad_norm": 35.34375,
|
36361 |
+
"learning_rate": 9.84974383672821e-06,
|
36362 |
+
"loss": 18.9108,
|
36363 |
+
"step": 51820
|
36364 |
+
},
|
36365 |
+
{
|
36366 |
+
"epoch": 0.9618252968589436,
|
36367 |
+
"grad_norm": 35.8125,
|
36368 |
+
"learning_rate": 9.849714840942167e-06,
|
36369 |
+
"loss": 18.8733,
|
36370 |
+
"step": 51830
|
36371 |
+
},
|
36372 |
+
{
|
36373 |
+
"epoch": 0.9620108699434234,
|
36374 |
+
"grad_norm": 37.21875,
|
36375 |
+
"learning_rate": 9.849685845156124e-06,
|
36376 |
+
"loss": 18.7665,
|
36377 |
+
"step": 51840
|
36378 |
+
},
|
36379 |
+
{
|
36380 |
+
"epoch": 0.9621964430279032,
|
36381 |
+
"grad_norm": 35.4375,
|
36382 |
+
"learning_rate": 9.849656849370082e-06,
|
36383 |
+
"loss": 18.7238,
|
36384 |
+
"step": 51850
|
36385 |
+
},
|
36386 |
+
{
|
36387 |
+
"epoch": 0.962382016112383,
|
36388 |
+
"grad_norm": 34.59375,
|
36389 |
+
"learning_rate": 9.84962785358404e-06,
|
36390 |
+
"loss": 18.8423,
|
36391 |
+
"step": 51860
|
36392 |
+
},
|
36393 |
+
{
|
36394 |
+
"epoch": 0.9625675891968629,
|
36395 |
+
"grad_norm": 36.65625,
|
36396 |
+
"learning_rate": 9.849598857797997e-06,
|
36397 |
+
"loss": 18.623,
|
36398 |
+
"step": 51870
|
36399 |
+
},
|
36400 |
+
{
|
36401 |
+
"epoch": 0.9627531622813427,
|
36402 |
+
"grad_norm": 36.84375,
|
36403 |
+
"learning_rate": 9.849569862011956e-06,
|
36404 |
+
"loss": 18.4343,
|
36405 |
+
"step": 51880
|
36406 |
+
},
|
36407 |
+
{
|
36408 |
+
"epoch": 0.9629387353658225,
|
36409 |
+
"grad_norm": 34.9375,
|
36410 |
+
"learning_rate": 9.849540866225911e-06,
|
36411 |
+
"loss": 18.7979,
|
36412 |
+
"step": 51890
|
36413 |
+
},
|
36414 |
+
{
|
36415 |
+
"epoch": 0.9631243084503024,
|
36416 |
+
"grad_norm": 36.875,
|
36417 |
+
"learning_rate": 9.849511870439869e-06,
|
36418 |
+
"loss": 18.9062,
|
36419 |
+
"step": 51900
|
36420 |
+
},
|
36421 |
+
{
|
36422 |
+
"epoch": 0.9633098815347821,
|
36423 |
+
"grad_norm": 38.59375,
|
36424 |
+
"learning_rate": 9.849482874653828e-06,
|
36425 |
+
"loss": 18.7673,
|
36426 |
+
"step": 51910
|
36427 |
+
},
|
36428 |
+
{
|
36429 |
+
"epoch": 0.963495454619262,
|
36430 |
+
"grad_norm": 36.71875,
|
36431 |
+
"learning_rate": 9.849453878867785e-06,
|
36432 |
+
"loss": 18.5212,
|
36433 |
+
"step": 51920
|
36434 |
+
},
|
36435 |
+
{
|
36436 |
+
"epoch": 0.9636810277037419,
|
36437 |
+
"grad_norm": 36.625,
|
36438 |
+
"learning_rate": 9.849424883081743e-06,
|
36439 |
+
"loss": 18.6261,
|
36440 |
+
"step": 51930
|
36441 |
+
},
|
36442 |
+
{
|
36443 |
+
"epoch": 0.9638666007882217,
|
36444 |
+
"grad_norm": 37.84375,
|
36445 |
+
"learning_rate": 9.8493958872957e-06,
|
36446 |
+
"loss": 19.0427,
|
36447 |
+
"step": 51940
|
36448 |
+
},
|
36449 |
+
{
|
36450 |
+
"epoch": 0.9640521738727015,
|
36451 |
+
"grad_norm": 36.9375,
|
36452 |
+
"learning_rate": 9.849366891509658e-06,
|
36453 |
+
"loss": 18.7989,
|
36454 |
+
"step": 51950
|
36455 |
+
},
|
36456 |
+
{
|
36457 |
+
"epoch": 0.9642377469571813,
|
36458 |
+
"grad_norm": 34.875,
|
36459 |
+
"learning_rate": 9.849337895723615e-06,
|
36460 |
+
"loss": 18.5892,
|
36461 |
+
"step": 51960
|
36462 |
+
},
|
36463 |
+
{
|
36464 |
+
"epoch": 0.9644233200416612,
|
36465 |
+
"grad_norm": 37.0,
|
36466 |
+
"learning_rate": 9.849308899937572e-06,
|
36467 |
+
"loss": 18.9571,
|
36468 |
+
"step": 51970
|
36469 |
+
},
|
36470 |
+
{
|
36471 |
+
"epoch": 0.964608893126141,
|
36472 |
+
"grad_norm": 34.90625,
|
36473 |
+
"learning_rate": 9.849279904151531e-06,
|
36474 |
+
"loss": 18.6608,
|
36475 |
+
"step": 51980
|
36476 |
+
},
|
36477 |
+
{
|
36478 |
+
"epoch": 0.9647944662106208,
|
36479 |
+
"grad_norm": 36.0,
|
36480 |
+
"learning_rate": 9.849250908365489e-06,
|
36481 |
+
"loss": 18.6838,
|
36482 |
+
"step": 51990
|
36483 |
+
},
|
36484 |
+
{
|
36485 |
+
"epoch": 0.9649800392951007,
|
36486 |
+
"grad_norm": 35.96875,
|
36487 |
+
"learning_rate": 9.849221912579445e-06,
|
36488 |
+
"loss": 18.5241,
|
36489 |
+
"step": 52000
|
36490 |
+
},
|
36491 |
+
{
|
36492 |
+
"epoch": 0.9651656123795804,
|
36493 |
+
"grad_norm": 34.5625,
|
36494 |
+
"learning_rate": 9.849192916793404e-06,
|
36495 |
+
"loss": 18.7984,
|
36496 |
+
"step": 52010
|
36497 |
+
},
|
36498 |
+
{
|
36499 |
+
"epoch": 0.9653511854640603,
|
36500 |
+
"grad_norm": 38.59375,
|
36501 |
+
"learning_rate": 9.849163921007361e-06,
|
36502 |
+
"loss": 18.8855,
|
36503 |
+
"step": 52020
|
36504 |
+
},
|
36505 |
+
{
|
36506 |
+
"epoch": 0.9655367585485402,
|
36507 |
+
"grad_norm": 38.34375,
|
36508 |
+
"learning_rate": 9.849134925221319e-06,
|
36509 |
+
"loss": 18.8712,
|
36510 |
+
"step": 52030
|
36511 |
+
},
|
36512 |
+
{
|
36513 |
+
"epoch": 0.9657223316330199,
|
36514 |
+
"grad_norm": 37.5,
|
36515 |
+
"learning_rate": 9.849105929435276e-06,
|
36516 |
+
"loss": 18.4082,
|
36517 |
+
"step": 52040
|
36518 |
+
},
|
36519 |
+
{
|
36520 |
+
"epoch": 0.9659079047174998,
|
36521 |
+
"grad_norm": 35.9375,
|
36522 |
+
"learning_rate": 9.849076933649233e-06,
|
36523 |
+
"loss": 18.8569,
|
36524 |
+
"step": 52050
|
36525 |
+
},
|
36526 |
+
{
|
36527 |
+
"epoch": 0.9660934778019796,
|
36528 |
+
"grad_norm": 35.5,
|
36529 |
+
"learning_rate": 9.84904793786319e-06,
|
36530 |
+
"loss": 19.0493,
|
36531 |
+
"step": 52060
|
36532 |
+
},
|
36533 |
+
{
|
36534 |
+
"epoch": 0.9662790508864594,
|
36535 |
+
"grad_norm": 35.0,
|
36536 |
+
"learning_rate": 9.849018942077148e-06,
|
36537 |
+
"loss": 18.684,
|
36538 |
+
"step": 52070
|
36539 |
+
},
|
36540 |
+
{
|
36541 |
+
"epoch": 0.9664646239709392,
|
36542 |
+
"grad_norm": 36.4375,
|
36543 |
+
"learning_rate": 9.848989946291107e-06,
|
36544 |
+
"loss": 18.4338,
|
36545 |
+
"step": 52080
|
36546 |
+
},
|
36547 |
+
{
|
36548 |
+
"epoch": 0.9666501970554191,
|
36549 |
+
"grad_norm": 37.75,
|
36550 |
+
"learning_rate": 9.848960950505065e-06,
|
36551 |
+
"loss": 19.0642,
|
36552 |
+
"step": 52090
|
36553 |
+
},
|
36554 |
+
{
|
36555 |
+
"epoch": 0.966835770139899,
|
36556 |
+
"grad_norm": 35.84375,
|
36557 |
+
"learning_rate": 9.84893195471902e-06,
|
36558 |
+
"loss": 18.8723,
|
36559 |
+
"step": 52100
|
36560 |
+
},
|
36561 |
+
{
|
36562 |
+
"epoch": 0.9670213432243787,
|
36563 |
+
"grad_norm": 37.125,
|
36564 |
+
"learning_rate": 9.84890295893298e-06,
|
36565 |
+
"loss": 18.7843,
|
36566 |
+
"step": 52110
|
36567 |
+
},
|
36568 |
+
{
|
36569 |
+
"epoch": 0.9672069163088586,
|
36570 |
+
"grad_norm": 36.1875,
|
36571 |
+
"learning_rate": 9.848873963146937e-06,
|
36572 |
+
"loss": 18.6314,
|
36573 |
+
"step": 52120
|
36574 |
+
},
|
36575 |
+
{
|
36576 |
+
"epoch": 0.9673924893933384,
|
36577 |
+
"grad_norm": 34.96875,
|
36578 |
+
"learning_rate": 9.848844967360894e-06,
|
36579 |
+
"loss": 18.5492,
|
36580 |
+
"step": 52130
|
36581 |
+
},
|
36582 |
+
{
|
36583 |
+
"epoch": 0.9675780624778182,
|
36584 |
+
"grad_norm": 35.8125,
|
36585 |
+
"learning_rate": 9.848815971574852e-06,
|
36586 |
+
"loss": 18.4559,
|
36587 |
+
"step": 52140
|
36588 |
+
},
|
36589 |
+
{
|
36590 |
+
"epoch": 0.967763635562298,
|
36591 |
+
"grad_norm": 36.40625,
|
36592 |
+
"learning_rate": 9.848786975788809e-06,
|
36593 |
+
"loss": 18.3422,
|
36594 |
+
"step": 52150
|
36595 |
+
},
|
36596 |
+
{
|
36597 |
+
"epoch": 0.9679492086467779,
|
36598 |
+
"grad_norm": 37.65625,
|
36599 |
+
"learning_rate": 9.848757980002766e-06,
|
36600 |
+
"loss": 18.8372,
|
36601 |
+
"step": 52160
|
36602 |
+
},
|
36603 |
+
{
|
36604 |
+
"epoch": 0.9681347817312577,
|
36605 |
+
"grad_norm": 37.59375,
|
36606 |
+
"learning_rate": 9.848728984216724e-06,
|
36607 |
+
"loss": 18.6309,
|
36608 |
+
"step": 52170
|
36609 |
+
},
|
36610 |
+
{
|
36611 |
+
"epoch": 0.9683203548157375,
|
36612 |
+
"grad_norm": 36.5625,
|
36613 |
+
"learning_rate": 9.848699988430683e-06,
|
36614 |
+
"loss": 18.9326,
|
36615 |
+
"step": 52180
|
36616 |
+
},
|
36617 |
+
{
|
36618 |
+
"epoch": 0.9685059279002174,
|
36619 |
+
"grad_norm": 36.625,
|
36620 |
+
"learning_rate": 9.84867099264464e-06,
|
36621 |
+
"loss": 18.8778,
|
36622 |
+
"step": 52190
|
36623 |
+
},
|
36624 |
+
{
|
36625 |
+
"epoch": 0.9686915009846971,
|
36626 |
+
"grad_norm": 37.6875,
|
36627 |
+
"learning_rate": 9.848641996858598e-06,
|
36628 |
+
"loss": 18.4787,
|
36629 |
+
"step": 52200
|
36630 |
+
},
|
36631 |
+
{
|
36632 |
+
"epoch": 0.968877074069177,
|
36633 |
+
"grad_norm": 35.84375,
|
36634 |
+
"learning_rate": 9.848613001072555e-06,
|
36635 |
+
"loss": 18.6075,
|
36636 |
+
"step": 52210
|
36637 |
+
},
|
36638 |
+
{
|
36639 |
+
"epoch": 0.9690626471536569,
|
36640 |
+
"grad_norm": 37.8125,
|
36641 |
+
"learning_rate": 9.848584005286513e-06,
|
36642 |
+
"loss": 18.5023,
|
36643 |
+
"step": 52220
|
36644 |
+
},
|
36645 |
+
{
|
36646 |
+
"epoch": 0.9692482202381366,
|
36647 |
+
"grad_norm": 35.75,
|
36648 |
+
"learning_rate": 9.84855500950047e-06,
|
36649 |
+
"loss": 18.3154,
|
36650 |
+
"step": 52230
|
36651 |
+
},
|
36652 |
+
{
|
36653 |
+
"epoch": 0.9694337933226165,
|
36654 |
+
"grad_norm": 37.28125,
|
36655 |
+
"learning_rate": 9.848526013714427e-06,
|
36656 |
+
"loss": 19.0297,
|
36657 |
+
"step": 52240
|
36658 |
+
},
|
36659 |
+
{
|
36660 |
+
"epoch": 0.9696193664070963,
|
36661 |
+
"grad_norm": 35.9375,
|
36662 |
+
"learning_rate": 9.848497017928385e-06,
|
36663 |
+
"loss": 18.438,
|
36664 |
+
"step": 52250
|
36665 |
+
},
|
36666 |
+
{
|
36667 |
+
"epoch": 0.9698049394915761,
|
36668 |
+
"grad_norm": 36.53125,
|
36669 |
+
"learning_rate": 9.848468022142342e-06,
|
36670 |
+
"loss": 18.473,
|
36671 |
+
"step": 52260
|
36672 |
+
},
|
36673 |
+
{
|
36674 |
+
"epoch": 0.969990512576056,
|
36675 |
+
"grad_norm": 37.34375,
|
36676 |
+
"learning_rate": 9.8484390263563e-06,
|
36677 |
+
"loss": 19.1365,
|
36678 |
+
"step": 52270
|
36679 |
+
},
|
36680 |
+
{
|
36681 |
+
"epoch": 0.9701760856605358,
|
36682 |
+
"grad_norm": 37.40625,
|
36683 |
+
"learning_rate": 9.848410030570257e-06,
|
36684 |
+
"loss": 18.9908,
|
36685 |
+
"step": 52280
|
36686 |
+
},
|
36687 |
+
{
|
36688 |
+
"epoch": 0.9703616587450157,
|
36689 |
+
"grad_norm": 37.96875,
|
36690 |
+
"learning_rate": 9.848381034784216e-06,
|
36691 |
+
"loss": 19.0627,
|
36692 |
+
"step": 52290
|
36693 |
+
},
|
36694 |
+
{
|
36695 |
+
"epoch": 0.9705472318294954,
|
36696 |
+
"grad_norm": 36.59375,
|
36697 |
+
"learning_rate": 9.848352038998174e-06,
|
36698 |
+
"loss": 18.6671,
|
36699 |
+
"step": 52300
|
36700 |
+
},
|
36701 |
+
{
|
36702 |
+
"epoch": 0.9707328049139753,
|
36703 |
+
"grad_norm": 37.0625,
|
36704 |
+
"learning_rate": 9.848323043212131e-06,
|
36705 |
+
"loss": 18.7139,
|
36706 |
+
"step": 52310
|
36707 |
+
},
|
36708 |
+
{
|
36709 |
+
"epoch": 0.9709183779984552,
|
36710 |
+
"grad_norm": 36.34375,
|
36711 |
+
"learning_rate": 9.848294047426088e-06,
|
36712 |
+
"loss": 18.8432,
|
36713 |
+
"step": 52320
|
36714 |
+
},
|
36715 |
+
{
|
36716 |
+
"epoch": 0.9711039510829349,
|
36717 |
+
"grad_norm": 35.34375,
|
36718 |
+
"learning_rate": 9.848265051640046e-06,
|
36719 |
+
"loss": 18.6022,
|
36720 |
+
"step": 52330
|
36721 |
+
},
|
36722 |
+
{
|
36723 |
+
"epoch": 0.9712895241674148,
|
36724 |
+
"grad_norm": 36.9375,
|
36725 |
+
"learning_rate": 9.848236055854003e-06,
|
36726 |
+
"loss": 18.6221,
|
36727 |
+
"step": 52340
|
36728 |
+
},
|
36729 |
+
{
|
36730 |
+
"epoch": 0.9714750972518946,
|
36731 |
+
"grad_norm": 37.59375,
|
36732 |
+
"learning_rate": 9.84820706006796e-06,
|
36733 |
+
"loss": 18.5104,
|
36734 |
+
"step": 52350
|
36735 |
+
},
|
36736 |
+
{
|
36737 |
+
"epoch": 0.9716606703363744,
|
36738 |
+
"grad_norm": 37.28125,
|
36739 |
+
"learning_rate": 9.84817806428192e-06,
|
36740 |
+
"loss": 18.7772,
|
36741 |
+
"step": 52360
|
36742 |
+
},
|
36743 |
+
{
|
36744 |
+
"epoch": 0.9718462434208542,
|
36745 |
+
"grad_norm": 38.84375,
|
36746 |
+
"learning_rate": 9.848149068495875e-06,
|
36747 |
+
"loss": 18.6431,
|
36748 |
+
"step": 52370
|
36749 |
+
},
|
36750 |
+
{
|
36751 |
+
"epoch": 0.9720318165053341,
|
36752 |
+
"grad_norm": 37.15625,
|
36753 |
+
"learning_rate": 9.848120072709833e-06,
|
36754 |
+
"loss": 18.5793,
|
36755 |
+
"step": 52380
|
36756 |
+
},
|
36757 |
+
{
|
36758 |
+
"epoch": 0.9722173895898139,
|
36759 |
+
"grad_norm": 36.125,
|
36760 |
+
"learning_rate": 9.848091076923792e-06,
|
36761 |
+
"loss": 18.8981,
|
36762 |
+
"step": 52390
|
36763 |
+
},
|
36764 |
+
{
|
36765 |
+
"epoch": 0.9724029626742937,
|
36766 |
+
"grad_norm": 37.28125,
|
36767 |
+
"learning_rate": 9.84806208113775e-06,
|
36768 |
+
"loss": 18.5569,
|
36769 |
+
"step": 52400
|
36770 |
+
},
|
36771 |
+
{
|
36772 |
+
"epoch": 0.9725885357587736,
|
36773 |
+
"grad_norm": 35.8125,
|
36774 |
+
"learning_rate": 9.848033085351707e-06,
|
36775 |
+
"loss": 18.7243,
|
36776 |
+
"step": 52410
|
36777 |
+
},
|
36778 |
+
{
|
36779 |
+
"epoch": 0.9727741088432533,
|
36780 |
+
"grad_norm": 37.4375,
|
36781 |
+
"learning_rate": 9.848004089565664e-06,
|
36782 |
+
"loss": 19.0858,
|
36783 |
+
"step": 52420
|
36784 |
+
},
|
36785 |
+
{
|
36786 |
+
"epoch": 0.9729596819277332,
|
36787 |
+
"grad_norm": 36.5,
|
36788 |
+
"learning_rate": 9.847975093779622e-06,
|
36789 |
+
"loss": 18.5945,
|
36790 |
+
"step": 52430
|
36791 |
+
},
|
36792 |
+
{
|
36793 |
+
"epoch": 0.973145255012213,
|
36794 |
+
"grad_norm": 38.4375,
|
36795 |
+
"learning_rate": 9.847946097993579e-06,
|
36796 |
+
"loss": 18.5578,
|
36797 |
+
"step": 52440
|
36798 |
+
},
|
36799 |
+
{
|
36800 |
+
"epoch": 0.9733308280966928,
|
36801 |
+
"grad_norm": 35.40625,
|
36802 |
+
"learning_rate": 9.847917102207536e-06,
|
36803 |
+
"loss": 18.5476,
|
36804 |
+
"step": 52450
|
36805 |
+
},
|
36806 |
+
{
|
36807 |
+
"epoch": 0.9735164011811727,
|
36808 |
+
"grad_norm": 37.34375,
|
36809 |
+
"learning_rate": 9.847888106421495e-06,
|
36810 |
+
"loss": 18.6942,
|
36811 |
+
"step": 52460
|
36812 |
+
},
|
36813 |
+
{
|
36814 |
+
"epoch": 0.9737019742656525,
|
36815 |
+
"grad_norm": 35.71875,
|
36816 |
+
"learning_rate": 9.847859110635453e-06,
|
36817 |
+
"loss": 18.4927,
|
36818 |
+
"step": 52470
|
36819 |
+
},
|
36820 |
+
{
|
36821 |
+
"epoch": 0.9738875473501324,
|
36822 |
+
"grad_norm": 34.90625,
|
36823 |
+
"learning_rate": 9.847830114849409e-06,
|
36824 |
+
"loss": 18.5665,
|
36825 |
+
"step": 52480
|
36826 |
+
},
|
36827 |
+
{
|
36828 |
+
"epoch": 0.9740731204346121,
|
36829 |
+
"grad_norm": 35.46875,
|
36830 |
+
"learning_rate": 9.847801119063368e-06,
|
36831 |
+
"loss": 18.9818,
|
36832 |
+
"step": 52490
|
36833 |
+
},
|
36834 |
+
{
|
36835 |
+
"epoch": 0.974258693519092,
|
36836 |
+
"grad_norm": 35.90625,
|
36837 |
+
"learning_rate": 9.847772123277325e-06,
|
36838 |
+
"loss": 18.3819,
|
36839 |
+
"step": 52500
|
36840 |
+
},
|
36841 |
+
{
|
36842 |
+
"epoch": 0.9744442666035719,
|
36843 |
+
"grad_norm": 37.40625,
|
36844 |
+
"learning_rate": 9.847743127491283e-06,
|
36845 |
+
"loss": 18.266,
|
36846 |
+
"step": 52510
|
36847 |
+
},
|
36848 |
+
{
|
36849 |
+
"epoch": 0.9746298396880516,
|
36850 |
+
"grad_norm": 36.40625,
|
36851 |
+
"learning_rate": 9.84771413170524e-06,
|
36852 |
+
"loss": 19.177,
|
36853 |
+
"step": 52520
|
36854 |
+
},
|
36855 |
+
{
|
36856 |
+
"epoch": 0.9748154127725315,
|
36857 |
+
"grad_norm": 35.25,
|
36858 |
+
"learning_rate": 9.847685135919197e-06,
|
36859 |
+
"loss": 18.7006,
|
36860 |
+
"step": 52530
|
36861 |
+
},
|
36862 |
+
{
|
36863 |
+
"epoch": 0.9750009858570113,
|
36864 |
+
"grad_norm": 36.75,
|
36865 |
+
"learning_rate": 9.847656140133155e-06,
|
36866 |
+
"loss": 18.8634,
|
36867 |
+
"step": 52540
|
36868 |
+
},
|
36869 |
+
{
|
36870 |
+
"epoch": 0.9751865589414911,
|
36871 |
+
"grad_norm": 36.25,
|
36872 |
+
"learning_rate": 9.847627144347112e-06,
|
36873 |
+
"loss": 18.3737,
|
36874 |
+
"step": 52550
|
36875 |
+
},
|
36876 |
+
{
|
36877 |
+
"epoch": 0.975372132025971,
|
36878 |
+
"grad_norm": 37.21875,
|
36879 |
+
"learning_rate": 9.847598148561071e-06,
|
36880 |
+
"loss": 18.6165,
|
36881 |
+
"step": 52560
|
36882 |
+
},
|
36883 |
+
{
|
36884 |
+
"epoch": 0.9755577051104508,
|
36885 |
+
"grad_norm": 37.03125,
|
36886 |
+
"learning_rate": 9.847569152775029e-06,
|
36887 |
+
"loss": 18.7161,
|
36888 |
+
"step": 52570
|
36889 |
+
},
|
36890 |
+
{
|
36891 |
+
"epoch": 0.9757432781949306,
|
36892 |
+
"grad_norm": 34.65625,
|
36893 |
+
"learning_rate": 9.847540156988986e-06,
|
36894 |
+
"loss": 19.034,
|
36895 |
+
"step": 52580
|
36896 |
+
},
|
36897 |
+
{
|
36898 |
+
"epoch": 0.9759288512794104,
|
36899 |
+
"grad_norm": 37.09375,
|
36900 |
+
"learning_rate": 9.847511161202943e-06,
|
36901 |
+
"loss": 18.5442,
|
36902 |
+
"step": 52590
|
36903 |
+
},
|
36904 |
+
{
|
36905 |
+
"epoch": 0.9761144243638903,
|
36906 |
+
"grad_norm": 36.09375,
|
36907 |
+
"learning_rate": 9.847482165416901e-06,
|
36908 |
+
"loss": 18.6233,
|
36909 |
+
"step": 52600
|
36910 |
+
},
|
36911 |
+
{
|
36912 |
+
"epoch": 0.97629999744837,
|
36913 |
+
"grad_norm": 36.5,
|
36914 |
+
"learning_rate": 9.847453169630858e-06,
|
36915 |
+
"loss": 18.7935,
|
36916 |
+
"step": 52610
|
36917 |
+
},
|
36918 |
+
{
|
36919 |
+
"epoch": 0.9764855705328499,
|
36920 |
+
"grad_norm": 38.0,
|
36921 |
+
"learning_rate": 9.847424173844816e-06,
|
36922 |
+
"loss": 19.0615,
|
36923 |
+
"step": 52620
|
36924 |
+
},
|
36925 |
+
{
|
36926 |
+
"epoch": 0.9766711436173298,
|
36927 |
+
"grad_norm": 36.78125,
|
36928 |
+
"learning_rate": 9.847395178058775e-06,
|
36929 |
+
"loss": 18.2889,
|
36930 |
+
"step": 52630
|
36931 |
+
},
|
36932 |
+
{
|
36933 |
+
"epoch": 0.9768567167018096,
|
36934 |
+
"grad_norm": 37.28125,
|
36935 |
+
"learning_rate": 9.84736618227273e-06,
|
36936 |
+
"loss": 18.338,
|
36937 |
+
"step": 52640
|
36938 |
+
},
|
36939 |
+
{
|
36940 |
+
"epoch": 0.9770422897862894,
|
36941 |
+
"grad_norm": 38.15625,
|
36942 |
+
"learning_rate": 9.847337186486688e-06,
|
36943 |
+
"loss": 18.6097,
|
36944 |
+
"step": 52650
|
36945 |
+
},
|
36946 |
+
{
|
36947 |
+
"epoch": 0.9772278628707692,
|
36948 |
+
"grad_norm": 37.5625,
|
36949 |
+
"learning_rate": 9.847308190700647e-06,
|
36950 |
+
"loss": 18.959,
|
36951 |
+
"step": 52660
|
36952 |
+
},
|
36953 |
+
{
|
36954 |
+
"epoch": 0.9774134359552491,
|
36955 |
+
"grad_norm": 35.9375,
|
36956 |
+
"learning_rate": 9.847279194914604e-06,
|
36957 |
+
"loss": 18.3508,
|
36958 |
+
"step": 52670
|
36959 |
+
},
|
36960 |
+
{
|
36961 |
+
"epoch": 0.9775990090397289,
|
36962 |
+
"grad_norm": 36.65625,
|
36963 |
+
"learning_rate": 9.847250199128562e-06,
|
36964 |
+
"loss": 18.6836,
|
36965 |
+
"step": 52680
|
36966 |
+
},
|
36967 |
+
{
|
36968 |
+
"epoch": 0.9777845821242087,
|
36969 |
+
"grad_norm": 35.6875,
|
36970 |
+
"learning_rate": 9.84722120334252e-06,
|
36971 |
+
"loss": 18.8362,
|
36972 |
+
"step": 52690
|
36973 |
+
},
|
36974 |
+
{
|
36975 |
+
"epoch": 0.9779701552086886,
|
36976 |
+
"grad_norm": 35.78125,
|
36977 |
+
"learning_rate": 9.847192207556477e-06,
|
36978 |
+
"loss": 18.6928,
|
36979 |
+
"step": 52700
|
36980 |
+
},
|
36981 |
+
{
|
36982 |
+
"epoch": 0.9781557282931683,
|
36983 |
+
"grad_norm": 34.125,
|
36984 |
+
"learning_rate": 9.847163211770434e-06,
|
36985 |
+
"loss": 18.8454,
|
36986 |
+
"step": 52710
|
36987 |
+
},
|
36988 |
+
{
|
36989 |
+
"epoch": 0.9783413013776482,
|
36990 |
+
"grad_norm": 35.78125,
|
36991 |
+
"learning_rate": 9.847134215984391e-06,
|
36992 |
+
"loss": 18.703,
|
36993 |
+
"step": 52720
|
36994 |
+
},
|
36995 |
+
{
|
36996 |
+
"epoch": 0.978526874462128,
|
36997 |
+
"grad_norm": 35.25,
|
36998 |
+
"learning_rate": 9.847105220198349e-06,
|
36999 |
+
"loss": 18.9489,
|
37000 |
+
"step": 52730
|
37001 |
+
},
|
37002 |
+
{
|
37003 |
+
"epoch": 0.9787124475466078,
|
37004 |
+
"grad_norm": 36.875,
|
37005 |
+
"learning_rate": 9.847076224412308e-06,
|
37006 |
+
"loss": 19.0491,
|
37007 |
+
"step": 52740
|
37008 |
+
},
|
37009 |
+
{
|
37010 |
+
"epoch": 0.9788980206310877,
|
37011 |
+
"grad_norm": 37.0,
|
37012 |
+
"learning_rate": 9.847047228626264e-06,
|
37013 |
+
"loss": 18.4266,
|
37014 |
+
"step": 52750
|
37015 |
+
},
|
37016 |
+
{
|
37017 |
+
"epoch": 0.9790835937155675,
|
37018 |
+
"grad_norm": 35.84375,
|
37019 |
+
"learning_rate": 9.847018232840223e-06,
|
37020 |
+
"loss": 18.529,
|
37021 |
+
"step": 52760
|
37022 |
+
},
|
37023 |
+
{
|
37024 |
+
"epoch": 0.9792691668000473,
|
37025 |
+
"grad_norm": 36.0625,
|
37026 |
+
"learning_rate": 9.84698923705418e-06,
|
37027 |
+
"loss": 18.3869,
|
37028 |
+
"step": 52770
|
37029 |
+
},
|
37030 |
+
{
|
37031 |
+
"epoch": 0.9794547398845271,
|
37032 |
+
"grad_norm": 38.0,
|
37033 |
+
"learning_rate": 9.846960241268138e-06,
|
37034 |
+
"loss": 18.7703,
|
37035 |
+
"step": 52780
|
37036 |
+
},
|
37037 |
+
{
|
37038 |
+
"epoch": 0.979640312969007,
|
37039 |
+
"grad_norm": 38.09375,
|
37040 |
+
"learning_rate": 9.846931245482095e-06,
|
37041 |
+
"loss": 18.6845,
|
37042 |
+
"step": 52790
|
37043 |
+
},
|
37044 |
+
{
|
37045 |
+
"epoch": 0.9798258860534867,
|
37046 |
+
"grad_norm": 35.96875,
|
37047 |
+
"learning_rate": 9.846902249696052e-06,
|
37048 |
+
"loss": 18.5895,
|
37049 |
+
"step": 52800
|
37050 |
+
},
|
37051 |
+
{
|
37052 |
+
"epoch": 0.9800114591379666,
|
37053 |
+
"grad_norm": 36.75,
|
37054 |
+
"learning_rate": 9.84687325391001e-06,
|
37055 |
+
"loss": 18.6341,
|
37056 |
+
"step": 52810
|
37057 |
+
},
|
37058 |
+
{
|
37059 |
+
"epoch": 0.9801970322224465,
|
37060 |
+
"grad_norm": 38.5625,
|
37061 |
+
"learning_rate": 9.846844258123967e-06,
|
37062 |
+
"loss": 18.6194,
|
37063 |
+
"step": 52820
|
37064 |
+
},
|
37065 |
+
{
|
37066 |
+
"epoch": 0.9803826053069263,
|
37067 |
+
"grad_norm": 36.28125,
|
37068 |
+
"learning_rate": 9.846815262337925e-06,
|
37069 |
+
"loss": 18.7706,
|
37070 |
+
"step": 52830
|
37071 |
+
},
|
37072 |
+
{
|
37073 |
+
"epoch": 0.9805681783914061,
|
37074 |
+
"grad_norm": 37.4375,
|
37075 |
+
"learning_rate": 9.846786266551884e-06,
|
37076 |
+
"loss": 18.7345,
|
37077 |
+
"step": 52840
|
37078 |
+
},
|
37079 |
+
{
|
37080 |
+
"epoch": 0.980753751475886,
|
37081 |
+
"grad_norm": 37.125,
|
37082 |
+
"learning_rate": 9.84675727076584e-06,
|
37083 |
+
"loss": 18.2325,
|
37084 |
+
"step": 52850
|
37085 |
+
},
|
37086 |
+
{
|
37087 |
+
"epoch": 0.9809393245603658,
|
37088 |
+
"grad_norm": 36.53125,
|
37089 |
+
"learning_rate": 9.846728274979797e-06,
|
37090 |
+
"loss": 18.2759,
|
37091 |
+
"step": 52860
|
37092 |
+
},
|
37093 |
+
{
|
37094 |
+
"epoch": 0.9811248976448456,
|
37095 |
+
"grad_norm": 35.5625,
|
37096 |
+
"learning_rate": 9.846699279193756e-06,
|
37097 |
+
"loss": 18.5422,
|
37098 |
+
"step": 52870
|
37099 |
+
},
|
37100 |
+
{
|
37101 |
+
"epoch": 0.9813104707293254,
|
37102 |
+
"grad_norm": 36.21875,
|
37103 |
+
"learning_rate": 9.846670283407713e-06,
|
37104 |
+
"loss": 18.8652,
|
37105 |
+
"step": 52880
|
37106 |
+
},
|
37107 |
+
{
|
37108 |
+
"epoch": 0.9814960438138053,
|
37109 |
+
"grad_norm": 36.78125,
|
37110 |
+
"learning_rate": 9.84664128762167e-06,
|
37111 |
+
"loss": 18.8132,
|
37112 |
+
"step": 52890
|
37113 |
+
},
|
37114 |
+
{
|
37115 |
+
"epoch": 0.981681616898285,
|
37116 |
+
"grad_norm": 34.1875,
|
37117 |
+
"learning_rate": 9.846612291835628e-06,
|
37118 |
+
"loss": 18.7067,
|
37119 |
+
"step": 52900
|
37120 |
+
},
|
37121 |
+
{
|
37122 |
+
"epoch": 0.9818671899827649,
|
37123 |
+
"grad_norm": 38.78125,
|
37124 |
+
"learning_rate": 9.846583296049586e-06,
|
37125 |
+
"loss": 18.8695,
|
37126 |
+
"step": 52910
|
37127 |
+
},
|
37128 |
+
{
|
37129 |
+
"epoch": 0.9820527630672448,
|
37130 |
+
"grad_norm": 36.84375,
|
37131 |
+
"learning_rate": 9.846554300263543e-06,
|
37132 |
+
"loss": 18.9407,
|
37133 |
+
"step": 52920
|
37134 |
+
},
|
37135 |
+
{
|
37136 |
+
"epoch": 0.9822383361517245,
|
37137 |
+
"grad_norm": 37.0,
|
37138 |
+
"learning_rate": 9.8465253044775e-06,
|
37139 |
+
"loss": 18.5586,
|
37140 |
+
"step": 52930
|
37141 |
+
},
|
37142 |
+
{
|
37143 |
+
"epoch": 0.9824239092362044,
|
37144 |
+
"grad_norm": 37.0,
|
37145 |
+
"learning_rate": 9.84649630869146e-06,
|
37146 |
+
"loss": 18.701,
|
37147 |
+
"step": 52940
|
37148 |
+
},
|
37149 |
+
{
|
37150 |
+
"epoch": 0.9826094823206842,
|
37151 |
+
"grad_norm": 33.875,
|
37152 |
+
"learning_rate": 9.846467312905417e-06,
|
37153 |
+
"loss": 18.6175,
|
37154 |
+
"step": 52950
|
37155 |
+
},
|
37156 |
+
{
|
37157 |
+
"epoch": 0.982795055405164,
|
37158 |
+
"grad_norm": 36.78125,
|
37159 |
+
"learning_rate": 9.846438317119373e-06,
|
37160 |
+
"loss": 18.4298,
|
37161 |
+
"step": 52960
|
37162 |
+
},
|
37163 |
+
{
|
37164 |
+
"epoch": 0.9829806284896438,
|
37165 |
+
"grad_norm": 37.75,
|
37166 |
+
"learning_rate": 9.846409321333332e-06,
|
37167 |
+
"loss": 18.3273,
|
37168 |
+
"step": 52970
|
37169 |
+
},
|
37170 |
+
{
|
37171 |
+
"epoch": 0.9831662015741237,
|
37172 |
+
"grad_norm": 36.9375,
|
37173 |
+
"learning_rate": 9.846380325547289e-06,
|
37174 |
+
"loss": 18.5626,
|
37175 |
+
"step": 52980
|
37176 |
+
},
|
37177 |
+
{
|
37178 |
+
"epoch": 0.9833517746586035,
|
37179 |
+
"grad_norm": 36.21875,
|
37180 |
+
"learning_rate": 9.846351329761247e-06,
|
37181 |
+
"loss": 18.6245,
|
37182 |
+
"step": 52990
|
37183 |
+
},
|
37184 |
+
{
|
37185 |
+
"epoch": 0.9835373477430833,
|
37186 |
+
"grad_norm": 35.09375,
|
37187 |
+
"learning_rate": 9.846322333975204e-06,
|
37188 |
+
"loss": 18.2085,
|
37189 |
+
"step": 53000
|
37190 |
+
},
|
37191 |
+
{
|
37192 |
+
"epoch": 0.9837229208275632,
|
37193 |
+
"grad_norm": 37.78125,
|
37194 |
+
"learning_rate": 9.846293338189161e-06,
|
37195 |
+
"loss": 19.0781,
|
37196 |
+
"step": 53010
|
37197 |
+
},
|
37198 |
+
{
|
37199 |
+
"epoch": 0.983908493912043,
|
37200 |
+
"grad_norm": 37.15625,
|
37201 |
+
"learning_rate": 9.846264342403119e-06,
|
37202 |
+
"loss": 18.8288,
|
37203 |
+
"step": 53020
|
37204 |
+
},
|
37205 |
+
{
|
37206 |
+
"epoch": 0.9840940669965228,
|
37207 |
+
"grad_norm": 36.25,
|
37208 |
+
"learning_rate": 9.846235346617076e-06,
|
37209 |
+
"loss": 18.6047,
|
37210 |
+
"step": 53030
|
37211 |
+
},
|
37212 |
+
{
|
37213 |
+
"epoch": 0.9842796400810027,
|
37214 |
+
"grad_norm": 36.3125,
|
37215 |
+
"learning_rate": 9.846206350831035e-06,
|
37216 |
+
"loss": 18.9381,
|
37217 |
+
"step": 53040
|
37218 |
+
},
|
37219 |
+
{
|
37220 |
+
"epoch": 0.9844652131654825,
|
37221 |
+
"grad_norm": 36.4375,
|
37222 |
+
"learning_rate": 9.846177355044993e-06,
|
37223 |
+
"loss": 18.6755,
|
37224 |
+
"step": 53050
|
37225 |
+
},
|
37226 |
+
{
|
37227 |
+
"epoch": 0.9846507862499623,
|
37228 |
+
"grad_norm": 36.59375,
|
37229 |
+
"learning_rate": 9.84614835925895e-06,
|
37230 |
+
"loss": 18.5902,
|
37231 |
+
"step": 53060
|
37232 |
+
},
|
37233 |
+
{
|
37234 |
+
"epoch": 0.9848363593344421,
|
37235 |
+
"grad_norm": 35.90625,
|
37236 |
+
"learning_rate": 9.846119363472907e-06,
|
37237 |
+
"loss": 18.8222,
|
37238 |
+
"step": 53070
|
37239 |
+
},
|
37240 |
+
{
|
37241 |
+
"epoch": 0.985021932418922,
|
37242 |
+
"grad_norm": 36.75,
|
37243 |
+
"learning_rate": 9.846090367686865e-06,
|
37244 |
+
"loss": 18.9657,
|
37245 |
+
"step": 53080
|
37246 |
+
},
|
37247 |
+
{
|
37248 |
+
"epoch": 0.9852075055034017,
|
37249 |
+
"grad_norm": 36.875,
|
37250 |
+
"learning_rate": 9.846061371900822e-06,
|
37251 |
+
"loss": 18.6081,
|
37252 |
+
"step": 53090
|
37253 |
+
},
|
37254 |
+
{
|
37255 |
+
"epoch": 0.9853930785878816,
|
37256 |
+
"grad_norm": 38.75,
|
37257 |
+
"learning_rate": 9.84603237611478e-06,
|
37258 |
+
"loss": 19.0012,
|
37259 |
+
"step": 53100
|
37260 |
+
},
|
37261 |
+
{
|
37262 |
+
"epoch": 0.9855786516723615,
|
37263 |
+
"grad_norm": 35.40625,
|
37264 |
+
"learning_rate": 9.846003380328739e-06,
|
37265 |
+
"loss": 18.3228,
|
37266 |
+
"step": 53110
|
37267 |
+
},
|
37268 |
+
{
|
37269 |
+
"epoch": 0.9857642247568412,
|
37270 |
+
"grad_norm": 36.65625,
|
37271 |
+
"learning_rate": 9.845974384542695e-06,
|
37272 |
+
"loss": 18.4046,
|
37273 |
+
"step": 53120
|
37274 |
+
},
|
37275 |
+
{
|
37276 |
+
"epoch": 0.9859497978413211,
|
37277 |
+
"grad_norm": 36.21875,
|
37278 |
+
"learning_rate": 9.845945388756652e-06,
|
37279 |
+
"loss": 18.3394,
|
37280 |
+
"step": 53130
|
37281 |
+
},
|
37282 |
+
{
|
37283 |
+
"epoch": 0.986135370925801,
|
37284 |
+
"grad_norm": 36.96875,
|
37285 |
+
"learning_rate": 9.845916392970611e-06,
|
37286 |
+
"loss": 18.5615,
|
37287 |
+
"step": 53140
|
37288 |
+
},
|
37289 |
+
{
|
37290 |
+
"epoch": 0.9863209440102807,
|
37291 |
+
"grad_norm": 35.1875,
|
37292 |
+
"learning_rate": 9.845887397184568e-06,
|
37293 |
+
"loss": 18.6555,
|
37294 |
+
"step": 53150
|
37295 |
+
},
|
37296 |
+
{
|
37297 |
+
"epoch": 0.9865065170947606,
|
37298 |
+
"grad_norm": 38.15625,
|
37299 |
+
"learning_rate": 9.845858401398526e-06,
|
37300 |
+
"loss": 19.2572,
|
37301 |
+
"step": 53160
|
37302 |
+
},
|
37303 |
+
{
|
37304 |
+
"epoch": 0.9866920901792404,
|
37305 |
+
"grad_norm": 37.21875,
|
37306 |
+
"learning_rate": 9.845829405612483e-06,
|
37307 |
+
"loss": 18.6147,
|
37308 |
+
"step": 53170
|
37309 |
+
},
|
37310 |
+
{
|
37311 |
+
"epoch": 0.9868776632637203,
|
37312 |
+
"grad_norm": 35.09375,
|
37313 |
+
"learning_rate": 9.84580040982644e-06,
|
37314 |
+
"loss": 18.649,
|
37315 |
+
"step": 53180
|
37316 |
+
},
|
37317 |
+
{
|
37318 |
+
"epoch": 0.9870632363482,
|
37319 |
+
"grad_norm": 34.28125,
|
37320 |
+
"learning_rate": 9.845771414040398e-06,
|
37321 |
+
"loss": 18.5839,
|
37322 |
+
"step": 53190
|
37323 |
+
},
|
37324 |
+
{
|
37325 |
+
"epoch": 0.9872488094326799,
|
37326 |
+
"grad_norm": 37.71875,
|
37327 |
+
"learning_rate": 9.845742418254355e-06,
|
37328 |
+
"loss": 18.9422,
|
37329 |
+
"step": 53200
|
37330 |
+
},
|
37331 |
+
{
|
37332 |
+
"epoch": 0.9874343825171598,
|
37333 |
+
"grad_norm": 38.6875,
|
37334 |
+
"learning_rate": 9.845713422468315e-06,
|
37335 |
+
"loss": 19.0014,
|
37336 |
+
"step": 53210
|
37337 |
+
},
|
37338 |
+
{
|
37339 |
+
"epoch": 0.9876199556016395,
|
37340 |
+
"grad_norm": 38.03125,
|
37341 |
+
"learning_rate": 9.845684426682272e-06,
|
37342 |
+
"loss": 18.5322,
|
37343 |
+
"step": 53220
|
37344 |
+
},
|
37345 |
+
{
|
37346 |
+
"epoch": 0.9878055286861194,
|
37347 |
+
"grad_norm": 37.0,
|
37348 |
+
"learning_rate": 9.845655430896228e-06,
|
37349 |
+
"loss": 18.8219,
|
37350 |
+
"step": 53230
|
37351 |
+
},
|
37352 |
+
{
|
37353 |
+
"epoch": 0.9879911017705992,
|
37354 |
+
"grad_norm": 36.375,
|
37355 |
+
"learning_rate": 9.845626435110187e-06,
|
37356 |
+
"loss": 18.5651,
|
37357 |
+
"step": 53240
|
37358 |
+
},
|
37359 |
+
{
|
37360 |
+
"epoch": 0.988176674855079,
|
37361 |
+
"grad_norm": 35.09375,
|
37362 |
+
"learning_rate": 9.845597439324144e-06,
|
37363 |
+
"loss": 18.3392,
|
37364 |
+
"step": 53250
|
37365 |
+
},
|
37366 |
+
{
|
37367 |
+
"epoch": 0.9883622479395588,
|
37368 |
+
"grad_norm": 37.65625,
|
37369 |
+
"learning_rate": 9.845568443538102e-06,
|
37370 |
+
"loss": 19.2046,
|
37371 |
+
"step": 53260
|
37372 |
+
},
|
37373 |
+
{
|
37374 |
+
"epoch": 0.9885478210240387,
|
37375 |
+
"grad_norm": 36.3125,
|
37376 |
+
"learning_rate": 9.845539447752059e-06,
|
37377 |
+
"loss": 18.752,
|
37378 |
+
"step": 53270
|
37379 |
+
},
|
37380 |
+
{
|
37381 |
+
"epoch": 0.9887333941085185,
|
37382 |
+
"grad_norm": 37.09375,
|
37383 |
+
"learning_rate": 9.845510451966016e-06,
|
37384 |
+
"loss": 18.4617,
|
37385 |
+
"step": 53280
|
37386 |
+
},
|
37387 |
+
{
|
37388 |
+
"epoch": 0.9889189671929983,
|
37389 |
+
"grad_norm": 37.125,
|
37390 |
+
"learning_rate": 9.845481456179974e-06,
|
37391 |
+
"loss": 18.4353,
|
37392 |
+
"step": 53290
|
37393 |
+
},
|
37394 |
+
{
|
37395 |
+
"epoch": 0.9891045402774782,
|
37396 |
+
"grad_norm": 37.375,
|
37397 |
+
"learning_rate": 9.845452460393931e-06,
|
37398 |
+
"loss": 18.8832,
|
37399 |
+
"step": 53300
|
37400 |
+
},
|
37401 |
+
{
|
37402 |
+
"epoch": 0.9892901133619579,
|
37403 |
+
"grad_norm": 35.3125,
|
37404 |
+
"learning_rate": 9.845423464607889e-06,
|
37405 |
+
"loss": 19.0743,
|
37406 |
+
"step": 53310
|
37407 |
+
},
|
37408 |
+
{
|
37409 |
+
"epoch": 0.9894756864464378,
|
37410 |
+
"grad_norm": 35.78125,
|
37411 |
+
"learning_rate": 9.845394468821848e-06,
|
37412 |
+
"loss": 18.8587,
|
37413 |
+
"step": 53320
|
37414 |
+
},
|
37415 |
+
{
|
37416 |
+
"epoch": 0.9896612595309177,
|
37417 |
+
"grad_norm": 35.21875,
|
37418 |
+
"learning_rate": 9.845365473035805e-06,
|
37419 |
+
"loss": 18.5995,
|
37420 |
+
"step": 53330
|
37421 |
+
},
|
37422 |
+
{
|
37423 |
+
"epoch": 0.9898468326153974,
|
37424 |
+
"grad_norm": 37.40625,
|
37425 |
+
"learning_rate": 9.845336477249763e-06,
|
37426 |
+
"loss": 18.2105,
|
37427 |
+
"step": 53340
|
37428 |
+
},
|
37429 |
+
{
|
37430 |
+
"epoch": 0.9900324056998773,
|
37431 |
+
"grad_norm": 35.1875,
|
37432 |
+
"learning_rate": 9.84530748146372e-06,
|
37433 |
+
"loss": 18.6503,
|
37434 |
+
"step": 53350
|
37435 |
+
},
|
37436 |
+
{
|
37437 |
+
"epoch": 0.9902179787843571,
|
37438 |
+
"grad_norm": 37.09375,
|
37439 |
+
"learning_rate": 9.845278485677677e-06,
|
37440 |
+
"loss": 18.6598,
|
37441 |
+
"step": 53360
|
37442 |
+
},
|
37443 |
+
{
|
37444 |
+
"epoch": 0.990403551868837,
|
37445 |
+
"grad_norm": 36.21875,
|
37446 |
+
"learning_rate": 9.845249489891635e-06,
|
37447 |
+
"loss": 18.2827,
|
37448 |
+
"step": 53370
|
37449 |
+
},
|
37450 |
+
{
|
37451 |
+
"epoch": 0.9905891249533167,
|
37452 |
+
"grad_norm": 36.03125,
|
37453 |
+
"learning_rate": 9.845220494105592e-06,
|
37454 |
+
"loss": 18.5182,
|
37455 |
+
"step": 53380
|
37456 |
+
},
|
37457 |
+
{
|
37458 |
+
"epoch": 0.9907746980377966,
|
37459 |
+
"grad_norm": 37.5,
|
37460 |
+
"learning_rate": 9.84519149831955e-06,
|
37461 |
+
"loss": 18.4391,
|
37462 |
+
"step": 53390
|
37463 |
+
},
|
37464 |
+
{
|
37465 |
+
"epoch": 0.9909602711222765,
|
37466 |
+
"grad_norm": 37.21875,
|
37467 |
+
"learning_rate": 9.845162502533507e-06,
|
37468 |
+
"loss": 19.1688,
|
37469 |
+
"step": 53400
|
37470 |
+
},
|
37471 |
+
{
|
37472 |
+
"epoch": 0.9911458442067562,
|
37473 |
+
"grad_norm": 37.375,
|
37474 |
+
"learning_rate": 9.845133506747464e-06,
|
37475 |
+
"loss": 18.4897,
|
37476 |
+
"step": 53410
|
37477 |
+
},
|
37478 |
+
{
|
37479 |
+
"epoch": 0.9913314172912361,
|
37480 |
+
"grad_norm": 37.90625,
|
37481 |
+
"learning_rate": 9.845104510961424e-06,
|
37482 |
+
"loss": 19.3897,
|
37483 |
+
"step": 53420
|
37484 |
+
},
|
37485 |
+
{
|
37486 |
+
"epoch": 0.991516990375716,
|
37487 |
+
"grad_norm": 36.90625,
|
37488 |
+
"learning_rate": 9.845075515175381e-06,
|
37489 |
+
"loss": 18.5943,
|
37490 |
+
"step": 53430
|
37491 |
+
},
|
37492 |
+
{
|
37493 |
+
"epoch": 0.9917025634601957,
|
37494 |
+
"grad_norm": 36.65625,
|
37495 |
+
"learning_rate": 9.845046519389337e-06,
|
37496 |
+
"loss": 19.0475,
|
37497 |
+
"step": 53440
|
37498 |
+
},
|
37499 |
+
{
|
37500 |
+
"epoch": 0.9918881365446756,
|
37501 |
+
"grad_norm": 38.96875,
|
37502 |
+
"learning_rate": 9.845017523603296e-06,
|
37503 |
+
"loss": 18.6567,
|
37504 |
+
"step": 53450
|
37505 |
+
},
|
37506 |
+
{
|
37507 |
+
"epoch": 0.9920737096291554,
|
37508 |
+
"grad_norm": 37.5625,
|
37509 |
+
"learning_rate": 9.844988527817253e-06,
|
37510 |
+
"loss": 18.2615,
|
37511 |
+
"step": 53460
|
37512 |
+
},
|
37513 |
+
{
|
37514 |
+
"epoch": 0.9922592827136352,
|
37515 |
+
"grad_norm": 36.125,
|
37516 |
+
"learning_rate": 9.84495953203121e-06,
|
37517 |
+
"loss": 18.7222,
|
37518 |
+
"step": 53470
|
37519 |
+
},
|
37520 |
+
{
|
37521 |
+
"epoch": 0.992444855798115,
|
37522 |
+
"grad_norm": 37.46875,
|
37523 |
+
"learning_rate": 9.844930536245168e-06,
|
37524 |
+
"loss": 18.6064,
|
37525 |
+
"step": 53480
|
37526 |
+
},
|
37527 |
+
{
|
37528 |
+
"epoch": 0.9926304288825949,
|
37529 |
+
"grad_norm": 35.5625,
|
37530 |
+
"learning_rate": 9.844901540459127e-06,
|
37531 |
+
"loss": 18.9255,
|
37532 |
+
"step": 53490
|
37533 |
+
},
|
37534 |
+
{
|
37535 |
+
"epoch": 0.9928160019670746,
|
37536 |
+
"grad_norm": 37.0625,
|
37537 |
+
"learning_rate": 9.844872544673083e-06,
|
37538 |
+
"loss": 18.2984,
|
37539 |
+
"step": 53500
|
37540 |
+
},
|
37541 |
+
{
|
37542 |
+
"epoch": 0.9930015750515545,
|
37543 |
+
"grad_norm": 35.96875,
|
37544 |
+
"learning_rate": 9.84484354888704e-06,
|
37545 |
+
"loss": 18.5695,
|
37546 |
+
"step": 53510
|
37547 |
+
},
|
37548 |
+
{
|
37549 |
+
"epoch": 0.9931871481360344,
|
37550 |
+
"grad_norm": 37.21875,
|
37551 |
+
"learning_rate": 9.844814553101e-06,
|
37552 |
+
"loss": 18.8148,
|
37553 |
+
"step": 53520
|
37554 |
+
},
|
37555 |
+
{
|
37556 |
+
"epoch": 0.9933727212205142,
|
37557 |
+
"grad_norm": 35.28125,
|
37558 |
+
"learning_rate": 9.844785557314957e-06,
|
37559 |
+
"loss": 18.9416,
|
37560 |
+
"step": 53530
|
37561 |
+
},
|
37562 |
+
{
|
37563 |
+
"epoch": 0.993558294304994,
|
37564 |
+
"grad_norm": 35.96875,
|
37565 |
+
"learning_rate": 9.844756561528914e-06,
|
37566 |
+
"loss": 18.6266,
|
37567 |
+
"step": 53540
|
37568 |
+
},
|
37569 |
+
{
|
37570 |
+
"epoch": 0.9937438673894738,
|
37571 |
+
"grad_norm": 39.75,
|
37572 |
+
"learning_rate": 9.844727565742871e-06,
|
37573 |
+
"loss": 19.1788,
|
37574 |
+
"step": 53550
|
37575 |
+
},
|
37576 |
+
{
|
37577 |
+
"epoch": 0.9939294404739537,
|
37578 |
+
"grad_norm": 36.03125,
|
37579 |
+
"learning_rate": 9.844698569956829e-06,
|
37580 |
+
"loss": 18.8975,
|
37581 |
+
"step": 53560
|
37582 |
+
},
|
37583 |
+
{
|
37584 |
+
"epoch": 0.9941150135584335,
|
37585 |
+
"grad_norm": 36.0625,
|
37586 |
+
"learning_rate": 9.844669574170786e-06,
|
37587 |
+
"loss": 18.9393,
|
37588 |
+
"step": 53570
|
37589 |
+
},
|
37590 |
+
{
|
37591 |
+
"epoch": 0.9943005866429133,
|
37592 |
+
"grad_norm": 39.28125,
|
37593 |
+
"learning_rate": 9.844640578384744e-06,
|
37594 |
+
"loss": 18.5879,
|
37595 |
+
"step": 53580
|
37596 |
+
},
|
37597 |
+
{
|
37598 |
+
"epoch": 0.9944861597273932,
|
37599 |
+
"grad_norm": 36.46875,
|
37600 |
+
"learning_rate": 9.844611582598703e-06,
|
37601 |
+
"loss": 18.9418,
|
37602 |
+
"step": 53590
|
37603 |
+
},
|
37604 |
+
{
|
37605 |
+
"epoch": 0.9946717328118729,
|
37606 |
+
"grad_norm": 36.28125,
|
37607 |
+
"learning_rate": 9.844582586812659e-06,
|
37608 |
+
"loss": 18.719,
|
37609 |
+
"step": 53600
|
37610 |
+
},
|
37611 |
+
{
|
37612 |
+
"epoch": 0.9948573058963528,
|
37613 |
+
"grad_norm": 38.40625,
|
37614 |
+
"learning_rate": 9.844553591026616e-06,
|
37615 |
+
"loss": 18.7769,
|
37616 |
+
"step": 53610
|
37617 |
+
},
|
37618 |
+
{
|
37619 |
+
"epoch": 0.9950428789808327,
|
37620 |
+
"grad_norm": 35.125,
|
37621 |
+
"learning_rate": 9.844524595240575e-06,
|
37622 |
+
"loss": 18.7225,
|
37623 |
+
"step": 53620
|
37624 |
+
},
|
37625 |
+
{
|
37626 |
+
"epoch": 0.9952284520653124,
|
37627 |
+
"grad_norm": 36.21875,
|
37628 |
+
"learning_rate": 9.844495599454532e-06,
|
37629 |
+
"loss": 18.6759,
|
37630 |
+
"step": 53630
|
37631 |
+
},
|
37632 |
+
{
|
37633 |
+
"epoch": 0.9954140251497923,
|
37634 |
+
"grad_norm": 37.34375,
|
37635 |
+
"learning_rate": 9.84446660366849e-06,
|
37636 |
+
"loss": 18.6277,
|
37637 |
+
"step": 53640
|
37638 |
+
},
|
37639 |
+
{
|
37640 |
+
"epoch": 0.9955995982342721,
|
37641 |
+
"grad_norm": 38.28125,
|
37642 |
+
"learning_rate": 9.844437607882447e-06,
|
37643 |
+
"loss": 18.8499,
|
37644 |
+
"step": 53650
|
37645 |
+
},
|
37646 |
+
{
|
37647 |
+
"epoch": 0.9957851713187519,
|
37648 |
+
"grad_norm": 37.21875,
|
37649 |
+
"learning_rate": 9.844408612096405e-06,
|
37650 |
+
"loss": 18.6598,
|
37651 |
+
"step": 53660
|
37652 |
+
},
|
37653 |
+
{
|
37654 |
+
"epoch": 0.9959707444032317,
|
37655 |
+
"grad_norm": 36.4375,
|
37656 |
+
"learning_rate": 9.844379616310362e-06,
|
37657 |
+
"loss": 19.1049,
|
37658 |
+
"step": 53670
|
37659 |
+
},
|
37660 |
+
{
|
37661 |
+
"epoch": 0.9961563174877116,
|
37662 |
+
"grad_norm": 38.28125,
|
37663 |
+
"learning_rate": 9.84435062052432e-06,
|
37664 |
+
"loss": 18.3496,
|
37665 |
+
"step": 53680
|
37666 |
+
},
|
37667 |
+
{
|
37668 |
+
"epoch": 0.9963418905721914,
|
37669 |
+
"grad_norm": 37.5,
|
37670 |
+
"learning_rate": 9.844321624738279e-06,
|
37671 |
+
"loss": 18.6458,
|
37672 |
+
"step": 53690
|
37673 |
+
},
|
37674 |
+
{
|
37675 |
+
"epoch": 0.9965274636566712,
|
37676 |
+
"grad_norm": 35.90625,
|
37677 |
+
"learning_rate": 9.844292628952236e-06,
|
37678 |
+
"loss": 18.5307,
|
37679 |
+
"step": 53700
|
37680 |
+
},
|
37681 |
+
{
|
37682 |
+
"epoch": 0.9967130367411511,
|
37683 |
+
"grad_norm": 36.53125,
|
37684 |
+
"learning_rate": 9.844263633166192e-06,
|
37685 |
+
"loss": 19.2581,
|
37686 |
+
"step": 53710
|
37687 |
+
},
|
37688 |
+
{
|
37689 |
+
"epoch": 0.9968986098256309,
|
37690 |
+
"grad_norm": 36.25,
|
37691 |
+
"learning_rate": 9.84423463738015e-06,
|
37692 |
+
"loss": 18.803,
|
37693 |
+
"step": 53720
|
37694 |
+
},
|
37695 |
+
{
|
37696 |
+
"epoch": 0.9970841829101107,
|
37697 |
+
"grad_norm": 37.1875,
|
37698 |
+
"learning_rate": 9.844205641594108e-06,
|
37699 |
+
"loss": 18.2188,
|
37700 |
+
"step": 53730
|
37701 |
+
},
|
37702 |
+
{
|
37703 |
+
"epoch": 0.9972697559945906,
|
37704 |
+
"grad_norm": 36.8125,
|
37705 |
+
"learning_rate": 9.844176645808066e-06,
|
37706 |
+
"loss": 18.7958,
|
37707 |
+
"step": 53740
|
37708 |
+
},
|
37709 |
+
{
|
37710 |
+
"epoch": 0.9974553290790704,
|
37711 |
+
"grad_norm": 33.96875,
|
37712 |
+
"learning_rate": 9.844147650022023e-06,
|
37713 |
+
"loss": 18.6348,
|
37714 |
+
"step": 53750
|
37715 |
+
},
|
37716 |
+
{
|
37717 |
+
"epoch": 0.9976409021635502,
|
37718 |
+
"grad_norm": 35.96875,
|
37719 |
+
"learning_rate": 9.84411865423598e-06,
|
37720 |
+
"loss": 18.5909,
|
37721 |
+
"step": 53760
|
37722 |
+
},
|
37723 |
+
{
|
37724 |
+
"epoch": 0.99782647524803,
|
37725 |
+
"grad_norm": 36.0625,
|
37726 |
+
"learning_rate": 9.844089658449938e-06,
|
37727 |
+
"loss": 19.1494,
|
37728 |
+
"step": 53770
|
37729 |
+
},
|
37730 |
+
{
|
37731 |
+
"epoch": 0.9980120483325099,
|
37732 |
+
"grad_norm": 35.6875,
|
37733 |
+
"learning_rate": 9.844060662663895e-06,
|
37734 |
+
"loss": 18.9692,
|
37735 |
+
"step": 53780
|
37736 |
+
},
|
37737 |
+
{
|
37738 |
+
"epoch": 0.9981976214169896,
|
37739 |
+
"grad_norm": 36.0,
|
37740 |
+
"learning_rate": 9.844031666877853e-06,
|
37741 |
+
"loss": 18.5912,
|
37742 |
+
"step": 53790
|
37743 |
+
},
|
37744 |
+
{
|
37745 |
+
"epoch": 0.9983831945014695,
|
37746 |
+
"grad_norm": 33.34375,
|
37747 |
+
"learning_rate": 9.844002671091812e-06,
|
37748 |
+
"loss": 18.6809,
|
37749 |
+
"step": 53800
|
37750 |
+
},
|
37751 |
+
{
|
37752 |
+
"epoch": 0.9985687675859494,
|
37753 |
+
"grad_norm": 36.0625,
|
37754 |
+
"learning_rate": 9.84397367530577e-06,
|
37755 |
+
"loss": 18.3271,
|
37756 |
+
"step": 53810
|
37757 |
+
},
|
37758 |
+
{
|
37759 |
+
"epoch": 0.9987543406704291,
|
37760 |
+
"grad_norm": 37.28125,
|
37761 |
+
"learning_rate": 9.843944679519727e-06,
|
37762 |
+
"loss": 18.652,
|
37763 |
+
"step": 53820
|
37764 |
+
},
|
37765 |
+
{
|
37766 |
+
"epoch": 0.998939913754909,
|
37767 |
+
"grad_norm": 38.375,
|
37768 |
+
"learning_rate": 9.843915683733684e-06,
|
37769 |
+
"loss": 18.4857,
|
37770 |
+
"step": 53830
|
37771 |
+
},
|
37772 |
+
{
|
37773 |
+
"epoch": 0.9991254868393888,
|
37774 |
+
"grad_norm": 37.53125,
|
37775 |
+
"learning_rate": 9.843886687947641e-06,
|
37776 |
+
"loss": 18.413,
|
37777 |
+
"step": 53840
|
37778 |
+
},
|
37779 |
+
{
|
37780 |
+
"epoch": 0.9993110599238686,
|
37781 |
+
"grad_norm": 35.71875,
|
37782 |
+
"learning_rate": 9.843857692161599e-06,
|
37783 |
+
"loss": 18.8629,
|
37784 |
+
"step": 53850
|
37785 |
+
},
|
37786 |
+
{
|
37787 |
+
"epoch": 0.9994966330083485,
|
37788 |
+
"grad_norm": 35.84375,
|
37789 |
+
"learning_rate": 9.843828696375556e-06,
|
37790 |
+
"loss": 18.0011,
|
37791 |
+
"step": 53860
|
37792 |
+
},
|
37793 |
+
{
|
37794 |
+
"epoch": 0.9996822060928283,
|
37795 |
+
"grad_norm": 35.1875,
|
37796 |
+
"learning_rate": 9.843799700589514e-06,
|
37797 |
+
"loss": 18.2267,
|
37798 |
+
"step": 53870
|
37799 |
+
},
|
37800 |
+
{
|
37801 |
+
"epoch": 0.9998677791773081,
|
37802 |
+
"grad_norm": 36.34375,
|
37803 |
+
"learning_rate": 9.843770704803471e-06,
|
37804 |
+
"loss": 18.813,
|
37805 |
+
"step": 53880
|
37806 |
}
|
37807 |
],
|
37808 |
"logging_steps": 10,
|
|
|
37817 |
"should_evaluate": false,
|
37818 |
"should_log": false,
|
37819 |
"should_save": true,
|
37820 |
+
"should_training_stop": true
|
37821 |
},
|
37822 |
"attributes": {}
|
37823 |
}
|
37824 |
},
|
37825 |
+
"total_flos": 9.406098491698053e+18,
|
37826 |
"train_batch_size": 8,
|
37827 |
"trial_name": null,
|
37828 |
"trial_params": null
|