Training in progress, step 50000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +3511 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 306619286
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42e515690f5aa44788037bc691fc50cda62efdc1e9ec95468ccfc1c14ddc5921
|
3 |
size 306619286
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 919972410
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d73eb6c224b233f29f674cdd64d26045c1f7d11e30ce8854b7e7b89712608f32
|
3 |
size 919972410
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a2fbcd26bac3ea7dc02fc9ede5b8a1914ca51611473722a11a969e1f26ac0ee
|
3 |
size 15984
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66d97b511d2fdb8061e5bf72c139923941c148260fac1caedd654028da6986c1
|
3 |
size 15984
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3839473129eb8c438ab312370daa55eb10a0790f33d38fc5eaa24859b54b0d1f
|
3 |
size 15984
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5088a0d34c7015afe60457fbb3f0a4740839369017a42ea4b3250322c2d63ceb
|
3 |
size 15984
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9cac0eb25286b75549fa2030810940adf357064a83facaf5c58ebe37190b6ac
|
3 |
size 15984
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0a57d29811122d52bd53f81af680412b91dde1cd2a12fa885d8a54388be8e2d
|
3 |
size 15984
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c90ab29b255eaf920ecc1cba0b586e426f8e2db67b44a65576693f84178a04f
|
3 |
size 15984
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15984
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4efbfa3cfb1bb8fb9c3380e65959a8b4eaf3bceb0507a26ffba1a3e4636ddb1
|
3 |
size 15984
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f4a7a81ed03f103247da707b419a2fc41f93cae3cdeaa774ea677c3726570ee
|
3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 5000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -31579,6 +31579,3514 @@
|
|
31579 |
"eval_samples_per_second": 3194.103,
|
31580 |
"eval_steps_per_second": 49.909,
|
31581 |
"step": 45000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31582 |
}
|
31583 |
],
|
31584 |
"logging_steps": 10,
|
@@ -31598,7 +35106,7 @@
|
|
31598 |
"attributes": {}
|
31599 |
}
|
31600 |
},
|
31601 |
-
"total_flos":
|
31602 |
"train_batch_size": 8,
|
31603 |
"trial_name": null,
|
31604 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9278654223991353,
|
5 |
"eval_steps": 5000,
|
6 |
+
"global_step": 50000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
31579 |
"eval_samples_per_second": 3194.103,
|
31580 |
"eval_steps_per_second": 49.909,
|
31581 |
"step": 45000
|
31582 |
+
},
|
31583 |
+
{
|
31584 |
+
"epoch": 0.8352644532437016,
|
31585 |
+
"grad_norm": 35.46875,
|
31586 |
+
"learning_rate": 9.869489967023093e-06,
|
31587 |
+
"loss": 18.7658,
|
31588 |
+
"step": 45010
|
31589 |
+
},
|
31590 |
+
{
|
31591 |
+
"epoch": 0.8354500263281813,
|
31592 |
+
"grad_norm": 38.0625,
|
31593 |
+
"learning_rate": 9.86946097123705e-06,
|
31594 |
+
"loss": 19.191,
|
31595 |
+
"step": 45020
|
31596 |
+
},
|
31597 |
+
{
|
31598 |
+
"epoch": 0.8356355994126612,
|
31599 |
+
"grad_norm": 35.5625,
|
31600 |
+
"learning_rate": 9.869431975451008e-06,
|
31601 |
+
"loss": 19.1063,
|
31602 |
+
"step": 45030
|
31603 |
+
},
|
31604 |
+
{
|
31605 |
+
"epoch": 0.835821172497141,
|
31606 |
+
"grad_norm": 36.21875,
|
31607 |
+
"learning_rate": 9.869402979664966e-06,
|
31608 |
+
"loss": 19.1574,
|
31609 |
+
"step": 45040
|
31610 |
+
},
|
31611 |
+
{
|
31612 |
+
"epoch": 0.8360067455816208,
|
31613 |
+
"grad_norm": 36.3125,
|
31614 |
+
"learning_rate": 9.869373983878923e-06,
|
31615 |
+
"loss": 18.7662,
|
31616 |
+
"step": 45050
|
31617 |
+
},
|
31618 |
+
{
|
31619 |
+
"epoch": 0.8361923186661007,
|
31620 |
+
"grad_norm": 34.78125,
|
31621 |
+
"learning_rate": 9.869344988092882e-06,
|
31622 |
+
"loss": 19.5668,
|
31623 |
+
"step": 45060
|
31624 |
+
},
|
31625 |
+
{
|
31626 |
+
"epoch": 0.8363778917505805,
|
31627 |
+
"grad_norm": 37.59375,
|
31628 |
+
"learning_rate": 9.86931599230684e-06,
|
31629 |
+
"loss": 18.7352,
|
31630 |
+
"step": 45070
|
31631 |
+
},
|
31632 |
+
{
|
31633 |
+
"epoch": 0.8365634648350603,
|
31634 |
+
"grad_norm": 35.15625,
|
31635 |
+
"learning_rate": 9.869286996520795e-06,
|
31636 |
+
"loss": 18.9398,
|
31637 |
+
"step": 45080
|
31638 |
+
},
|
31639 |
+
{
|
31640 |
+
"epoch": 0.8367490379195401,
|
31641 |
+
"grad_norm": 37.15625,
|
31642 |
+
"learning_rate": 9.869258000734754e-06,
|
31643 |
+
"loss": 18.3637,
|
31644 |
+
"step": 45090
|
31645 |
+
},
|
31646 |
+
{
|
31647 |
+
"epoch": 0.83693461100402,
|
31648 |
+
"grad_norm": 37.28125,
|
31649 |
+
"learning_rate": 9.869229004948712e-06,
|
31650 |
+
"loss": 18.9402,
|
31651 |
+
"step": 45100
|
31652 |
+
},
|
31653 |
+
{
|
31654 |
+
"epoch": 0.8371201840884998,
|
31655 |
+
"grad_norm": 36.21875,
|
31656 |
+
"learning_rate": 9.869200009162669e-06,
|
31657 |
+
"loss": 18.9655,
|
31658 |
+
"step": 45110
|
31659 |
+
},
|
31660 |
+
{
|
31661 |
+
"epoch": 0.8373057571729796,
|
31662 |
+
"grad_norm": 36.03125,
|
31663 |
+
"learning_rate": 9.869171013376627e-06,
|
31664 |
+
"loss": 19.3164,
|
31665 |
+
"step": 45120
|
31666 |
+
},
|
31667 |
+
{
|
31668 |
+
"epoch": 0.8374913302574595,
|
31669 |
+
"grad_norm": 37.5,
|
31670 |
+
"learning_rate": 9.869142017590584e-06,
|
31671 |
+
"loss": 19.1801,
|
31672 |
+
"step": 45130
|
31673 |
+
},
|
31674 |
+
{
|
31675 |
+
"epoch": 0.8376769033419393,
|
31676 |
+
"grad_norm": 34.65625,
|
31677 |
+
"learning_rate": 9.869113021804541e-06,
|
31678 |
+
"loss": 19.3652,
|
31679 |
+
"step": 45140
|
31680 |
+
},
|
31681 |
+
{
|
31682 |
+
"epoch": 0.8378624764264191,
|
31683 |
+
"grad_norm": 37.5,
|
31684 |
+
"learning_rate": 9.869084026018499e-06,
|
31685 |
+
"loss": 18.9179,
|
31686 |
+
"step": 45150
|
31687 |
+
},
|
31688 |
+
{
|
31689 |
+
"epoch": 0.838048049510899,
|
31690 |
+
"grad_norm": 35.875,
|
31691 |
+
"learning_rate": 9.869055030232458e-06,
|
31692 |
+
"loss": 19.1069,
|
31693 |
+
"step": 45160
|
31694 |
+
},
|
31695 |
+
{
|
31696 |
+
"epoch": 0.8382336225953788,
|
31697 |
+
"grad_norm": 32.875,
|
31698 |
+
"learning_rate": 9.869026034446415e-06,
|
31699 |
+
"loss": 18.7498,
|
31700 |
+
"step": 45170
|
31701 |
+
},
|
31702 |
+
{
|
31703 |
+
"epoch": 0.8384191956798586,
|
31704 |
+
"grad_norm": 36.15625,
|
31705 |
+
"learning_rate": 9.868997038660373e-06,
|
31706 |
+
"loss": 19.555,
|
31707 |
+
"step": 45180
|
31708 |
+
},
|
31709 |
+
{
|
31710 |
+
"epoch": 0.8386047687643384,
|
31711 |
+
"grad_norm": 34.9375,
|
31712 |
+
"learning_rate": 9.86896804287433e-06,
|
31713 |
+
"loss": 18.9055,
|
31714 |
+
"step": 45190
|
31715 |
+
},
|
31716 |
+
{
|
31717 |
+
"epoch": 0.8387903418488183,
|
31718 |
+
"grad_norm": 36.3125,
|
31719 |
+
"learning_rate": 9.868939047088287e-06,
|
31720 |
+
"loss": 18.6655,
|
31721 |
+
"step": 45200
|
31722 |
+
},
|
31723 |
+
{
|
31724 |
+
"epoch": 0.838975914933298,
|
31725 |
+
"grad_norm": 35.5,
|
31726 |
+
"learning_rate": 9.868910051302245e-06,
|
31727 |
+
"loss": 19.2085,
|
31728 |
+
"step": 45210
|
31729 |
+
},
|
31730 |
+
{
|
31731 |
+
"epoch": 0.8391614880177779,
|
31732 |
+
"grad_norm": 36.75,
|
31733 |
+
"learning_rate": 9.868881055516202e-06,
|
31734 |
+
"loss": 19.3331,
|
31735 |
+
"step": 45220
|
31736 |
+
},
|
31737 |
+
{
|
31738 |
+
"epoch": 0.8393470611022578,
|
31739 |
+
"grad_norm": 35.3125,
|
31740 |
+
"learning_rate": 9.868852059730161e-06,
|
31741 |
+
"loss": 19.1247,
|
31742 |
+
"step": 45230
|
31743 |
+
},
|
31744 |
+
{
|
31745 |
+
"epoch": 0.8395326341867375,
|
31746 |
+
"grad_norm": 36.59375,
|
31747 |
+
"learning_rate": 9.868823063944117e-06,
|
31748 |
+
"loss": 19.0293,
|
31749 |
+
"step": 45240
|
31750 |
+
},
|
31751 |
+
{
|
31752 |
+
"epoch": 0.8397182072712174,
|
31753 |
+
"grad_norm": 34.65625,
|
31754 |
+
"learning_rate": 9.868794068158074e-06,
|
31755 |
+
"loss": 18.9617,
|
31756 |
+
"step": 45250
|
31757 |
+
},
|
31758 |
+
{
|
31759 |
+
"epoch": 0.8399037803556972,
|
31760 |
+
"grad_norm": 34.28125,
|
31761 |
+
"learning_rate": 9.868765072372034e-06,
|
31762 |
+
"loss": 18.8723,
|
31763 |
+
"step": 45260
|
31764 |
+
},
|
31765 |
+
{
|
31766 |
+
"epoch": 0.840089353440177,
|
31767 |
+
"grad_norm": 37.09375,
|
31768 |
+
"learning_rate": 9.868736076585991e-06,
|
31769 |
+
"loss": 19.0723,
|
31770 |
+
"step": 45270
|
31771 |
+
},
|
31772 |
+
{
|
31773 |
+
"epoch": 0.8402749265246569,
|
31774 |
+
"grad_norm": 36.46875,
|
31775 |
+
"learning_rate": 9.868707080799948e-06,
|
31776 |
+
"loss": 18.9186,
|
31777 |
+
"step": 45280
|
31778 |
+
},
|
31779 |
+
{
|
31780 |
+
"epoch": 0.8404604996091367,
|
31781 |
+
"grad_norm": 37.03125,
|
31782 |
+
"learning_rate": 9.868678085013906e-06,
|
31783 |
+
"loss": 19.2951,
|
31784 |
+
"step": 45290
|
31785 |
+
},
|
31786 |
+
{
|
31787 |
+
"epoch": 0.8406460726936165,
|
31788 |
+
"grad_norm": 35.71875,
|
31789 |
+
"learning_rate": 9.868649089227863e-06,
|
31790 |
+
"loss": 18.891,
|
31791 |
+
"step": 45300
|
31792 |
+
},
|
31793 |
+
{
|
31794 |
+
"epoch": 0.8408316457780963,
|
31795 |
+
"grad_norm": 38.75,
|
31796 |
+
"learning_rate": 9.86862009344182e-06,
|
31797 |
+
"loss": 19.1318,
|
31798 |
+
"step": 45310
|
31799 |
+
},
|
31800 |
+
{
|
31801 |
+
"epoch": 0.8410172188625762,
|
31802 |
+
"grad_norm": 36.0625,
|
31803 |
+
"learning_rate": 9.868591097655778e-06,
|
31804 |
+
"loss": 18.8914,
|
31805 |
+
"step": 45320
|
31806 |
+
},
|
31807 |
+
{
|
31808 |
+
"epoch": 0.841202791947056,
|
31809 |
+
"grad_norm": 33.65625,
|
31810 |
+
"learning_rate": 9.868562101869737e-06,
|
31811 |
+
"loss": 18.6757,
|
31812 |
+
"step": 45330
|
31813 |
+
},
|
31814 |
+
{
|
31815 |
+
"epoch": 0.8413883650315358,
|
31816 |
+
"grad_norm": 37.125,
|
31817 |
+
"learning_rate": 9.868533106083695e-06,
|
31818 |
+
"loss": 19.2032,
|
31819 |
+
"step": 45340
|
31820 |
+
},
|
31821 |
+
{
|
31822 |
+
"epoch": 0.8415739381160157,
|
31823 |
+
"grad_norm": 34.6875,
|
31824 |
+
"learning_rate": 9.86850411029765e-06,
|
31825 |
+
"loss": 18.8749,
|
31826 |
+
"step": 45350
|
31827 |
+
},
|
31828 |
+
{
|
31829 |
+
"epoch": 0.8417595112004955,
|
31830 |
+
"grad_norm": 35.65625,
|
31831 |
+
"learning_rate": 9.86847511451161e-06,
|
31832 |
+
"loss": 19.2826,
|
31833 |
+
"step": 45360
|
31834 |
+
},
|
31835 |
+
{
|
31836 |
+
"epoch": 0.8419450842849753,
|
31837 |
+
"grad_norm": 36.875,
|
31838 |
+
"learning_rate": 9.868446118725567e-06,
|
31839 |
+
"loss": 18.9139,
|
31840 |
+
"step": 45370
|
31841 |
+
},
|
31842 |
+
{
|
31843 |
+
"epoch": 0.8421306573694551,
|
31844 |
+
"grad_norm": 38.0,
|
31845 |
+
"learning_rate": 9.868417122939524e-06,
|
31846 |
+
"loss": 18.9206,
|
31847 |
+
"step": 45380
|
31848 |
+
},
|
31849 |
+
{
|
31850 |
+
"epoch": 0.842316230453935,
|
31851 |
+
"grad_norm": 35.6875,
|
31852 |
+
"learning_rate": 9.868388127153482e-06,
|
31853 |
+
"loss": 18.9808,
|
31854 |
+
"step": 45390
|
31855 |
+
},
|
31856 |
+
{
|
31857 |
+
"epoch": 0.8425018035384148,
|
31858 |
+
"grad_norm": 34.4375,
|
31859 |
+
"learning_rate": 9.868359131367439e-06,
|
31860 |
+
"loss": 18.8303,
|
31861 |
+
"step": 45400
|
31862 |
+
},
|
31863 |
+
{
|
31864 |
+
"epoch": 0.8426873766228946,
|
31865 |
+
"grad_norm": 36.5625,
|
31866 |
+
"learning_rate": 9.868330135581396e-06,
|
31867 |
+
"loss": 18.862,
|
31868 |
+
"step": 45410
|
31869 |
+
},
|
31870 |
+
{
|
31871 |
+
"epoch": 0.8428729497073745,
|
31872 |
+
"grad_norm": 37.1875,
|
31873 |
+
"learning_rate": 9.868301139795354e-06,
|
31874 |
+
"loss": 19.1974,
|
31875 |
+
"step": 45420
|
31876 |
+
},
|
31877 |
+
{
|
31878 |
+
"epoch": 0.8430585227918542,
|
31879 |
+
"grad_norm": 36.78125,
|
31880 |
+
"learning_rate": 9.868272144009313e-06,
|
31881 |
+
"loss": 19.5463,
|
31882 |
+
"step": 45430
|
31883 |
+
},
|
31884 |
+
{
|
31885 |
+
"epoch": 0.8432440958763341,
|
31886 |
+
"grad_norm": 34.59375,
|
31887 |
+
"learning_rate": 9.86824314822327e-06,
|
31888 |
+
"loss": 18.7967,
|
31889 |
+
"step": 45440
|
31890 |
+
},
|
31891 |
+
{
|
31892 |
+
"epoch": 0.843429668960814,
|
31893 |
+
"grad_norm": 36.75,
|
31894 |
+
"learning_rate": 9.868214152437226e-06,
|
31895 |
+
"loss": 19.0146,
|
31896 |
+
"step": 45450
|
31897 |
+
},
|
31898 |
+
{
|
31899 |
+
"epoch": 0.8436152420452937,
|
31900 |
+
"grad_norm": 36.65625,
|
31901 |
+
"learning_rate": 9.868185156651185e-06,
|
31902 |
+
"loss": 18.5762,
|
31903 |
+
"step": 45460
|
31904 |
+
},
|
31905 |
+
{
|
31906 |
+
"epoch": 0.8438008151297736,
|
31907 |
+
"grad_norm": 38.625,
|
31908 |
+
"learning_rate": 9.868156160865143e-06,
|
31909 |
+
"loss": 19.1253,
|
31910 |
+
"step": 45470
|
31911 |
+
},
|
31912 |
+
{
|
31913 |
+
"epoch": 0.8439863882142534,
|
31914 |
+
"grad_norm": 35.96875,
|
31915 |
+
"learning_rate": 9.8681271650791e-06,
|
31916 |
+
"loss": 18.8985,
|
31917 |
+
"step": 45480
|
31918 |
+
},
|
31919 |
+
{
|
31920 |
+
"epoch": 0.8441719612987333,
|
31921 |
+
"grad_norm": 36.625,
|
31922 |
+
"learning_rate": 9.868098169293057e-06,
|
31923 |
+
"loss": 18.4796,
|
31924 |
+
"step": 45490
|
31925 |
+
},
|
31926 |
+
{
|
31927 |
+
"epoch": 0.844357534383213,
|
31928 |
+
"grad_norm": 35.5625,
|
31929 |
+
"learning_rate": 9.868069173507015e-06,
|
31930 |
+
"loss": 18.9359,
|
31931 |
+
"step": 45500
|
31932 |
+
},
|
31933 |
+
{
|
31934 |
+
"epoch": 0.8445431074676929,
|
31935 |
+
"grad_norm": 35.125,
|
31936 |
+
"learning_rate": 9.868040177720972e-06,
|
31937 |
+
"loss": 19.2384,
|
31938 |
+
"step": 45510
|
31939 |
+
},
|
31940 |
+
{
|
31941 |
+
"epoch": 0.8447286805521728,
|
31942 |
+
"grad_norm": 38.3125,
|
31943 |
+
"learning_rate": 9.86801118193493e-06,
|
31944 |
+
"loss": 18.8187,
|
31945 |
+
"step": 45520
|
31946 |
+
},
|
31947 |
+
{
|
31948 |
+
"epoch": 0.8449142536366525,
|
31949 |
+
"grad_norm": 37.375,
|
31950 |
+
"learning_rate": 9.867982186148887e-06,
|
31951 |
+
"loss": 19.2896,
|
31952 |
+
"step": 45530
|
31953 |
+
},
|
31954 |
+
{
|
31955 |
+
"epoch": 0.8450998267211324,
|
31956 |
+
"grad_norm": 34.875,
|
31957 |
+
"learning_rate": 9.867953190362846e-06,
|
31958 |
+
"loss": 18.9661,
|
31959 |
+
"step": 45540
|
31960 |
+
},
|
31961 |
+
{
|
31962 |
+
"epoch": 0.8452853998056122,
|
31963 |
+
"grad_norm": 34.15625,
|
31964 |
+
"learning_rate": 9.867924194576803e-06,
|
31965 |
+
"loss": 19.0476,
|
31966 |
+
"step": 45550
|
31967 |
+
},
|
31968 |
+
{
|
31969 |
+
"epoch": 0.845470972890092,
|
31970 |
+
"grad_norm": 36.8125,
|
31971 |
+
"learning_rate": 9.867895198790761e-06,
|
31972 |
+
"loss": 19.2686,
|
31973 |
+
"step": 45560
|
31974 |
+
},
|
31975 |
+
{
|
31976 |
+
"epoch": 0.8456565459745718,
|
31977 |
+
"grad_norm": 36.5,
|
31978 |
+
"learning_rate": 9.867866203004718e-06,
|
31979 |
+
"loss": 19.37,
|
31980 |
+
"step": 45570
|
31981 |
+
},
|
31982 |
+
{
|
31983 |
+
"epoch": 0.8458421190590517,
|
31984 |
+
"grad_norm": 35.9375,
|
31985 |
+
"learning_rate": 9.867837207218676e-06,
|
31986 |
+
"loss": 19.5579,
|
31987 |
+
"step": 45580
|
31988 |
+
},
|
31989 |
+
{
|
31990 |
+
"epoch": 0.8460276921435315,
|
31991 |
+
"grad_norm": 35.6875,
|
31992 |
+
"learning_rate": 9.867808211432633e-06,
|
31993 |
+
"loss": 19.0715,
|
31994 |
+
"step": 45590
|
31995 |
+
},
|
31996 |
+
{
|
31997 |
+
"epoch": 0.8462132652280113,
|
31998 |
+
"grad_norm": 35.375,
|
31999 |
+
"learning_rate": 9.86777921564659e-06,
|
32000 |
+
"loss": 19.0432,
|
32001 |
+
"step": 45600
|
32002 |
+
},
|
32003 |
+
{
|
32004 |
+
"epoch": 0.8463988383124912,
|
32005 |
+
"grad_norm": 36.875,
|
32006 |
+
"learning_rate": 9.867750219860548e-06,
|
32007 |
+
"loss": 18.8202,
|
32008 |
+
"step": 45610
|
32009 |
+
},
|
32010 |
+
{
|
32011 |
+
"epoch": 0.8465844113969709,
|
32012 |
+
"grad_norm": 34.375,
|
32013 |
+
"learning_rate": 9.867721224074505e-06,
|
32014 |
+
"loss": 18.784,
|
32015 |
+
"step": 45620
|
32016 |
+
},
|
32017 |
+
{
|
32018 |
+
"epoch": 0.8467699844814508,
|
32019 |
+
"grad_norm": 36.0,
|
32020 |
+
"learning_rate": 9.867692228288463e-06,
|
32021 |
+
"loss": 18.8642,
|
32022 |
+
"step": 45630
|
32023 |
+
},
|
32024 |
+
{
|
32025 |
+
"epoch": 0.8469555575659307,
|
32026 |
+
"grad_norm": 36.875,
|
32027 |
+
"learning_rate": 9.867663232502422e-06,
|
32028 |
+
"loss": 19.0021,
|
32029 |
+
"step": 45640
|
32030 |
+
},
|
32031 |
+
{
|
32032 |
+
"epoch": 0.8471411306504104,
|
32033 |
+
"grad_norm": 35.09375,
|
32034 |
+
"learning_rate": 9.86763423671638e-06,
|
32035 |
+
"loss": 18.3599,
|
32036 |
+
"step": 45650
|
32037 |
+
},
|
32038 |
+
{
|
32039 |
+
"epoch": 0.8473267037348903,
|
32040 |
+
"grad_norm": 36.59375,
|
32041 |
+
"learning_rate": 9.867605240930337e-06,
|
32042 |
+
"loss": 18.9329,
|
32043 |
+
"step": 45660
|
32044 |
+
},
|
32045 |
+
{
|
32046 |
+
"epoch": 0.8475122768193701,
|
32047 |
+
"grad_norm": 36.9375,
|
32048 |
+
"learning_rate": 9.867576245144294e-06,
|
32049 |
+
"loss": 18.6595,
|
32050 |
+
"step": 45670
|
32051 |
+
},
|
32052 |
+
{
|
32053 |
+
"epoch": 0.84769784990385,
|
32054 |
+
"grad_norm": 35.96875,
|
32055 |
+
"learning_rate": 9.867547249358251e-06,
|
32056 |
+
"loss": 19.3835,
|
32057 |
+
"step": 45680
|
32058 |
+
},
|
32059 |
+
{
|
32060 |
+
"epoch": 0.8478834229883297,
|
32061 |
+
"grad_norm": 36.875,
|
32062 |
+
"learning_rate": 9.867518253572209e-06,
|
32063 |
+
"loss": 18.894,
|
32064 |
+
"step": 45690
|
32065 |
+
},
|
32066 |
+
{
|
32067 |
+
"epoch": 0.8480689960728096,
|
32068 |
+
"grad_norm": 35.40625,
|
32069 |
+
"learning_rate": 9.867489257786166e-06,
|
32070 |
+
"loss": 18.9299,
|
32071 |
+
"step": 45700
|
32072 |
+
},
|
32073 |
+
{
|
32074 |
+
"epoch": 0.8482545691572895,
|
32075 |
+
"grad_norm": 37.46875,
|
32076 |
+
"learning_rate": 9.867460262000125e-06,
|
32077 |
+
"loss": 19.3052,
|
32078 |
+
"step": 45710
|
32079 |
+
},
|
32080 |
+
{
|
32081 |
+
"epoch": 0.8484401422417692,
|
32082 |
+
"grad_norm": 34.59375,
|
32083 |
+
"learning_rate": 9.867431266214081e-06,
|
32084 |
+
"loss": 18.9821,
|
32085 |
+
"step": 45720
|
32086 |
+
},
|
32087 |
+
{
|
32088 |
+
"epoch": 0.8486257153262491,
|
32089 |
+
"grad_norm": 36.3125,
|
32090 |
+
"learning_rate": 9.867402270428039e-06,
|
32091 |
+
"loss": 18.9228,
|
32092 |
+
"step": 45730
|
32093 |
+
},
|
32094 |
+
{
|
32095 |
+
"epoch": 0.848811288410729,
|
32096 |
+
"grad_norm": 35.90625,
|
32097 |
+
"learning_rate": 9.867373274641998e-06,
|
32098 |
+
"loss": 19.1518,
|
32099 |
+
"step": 45740
|
32100 |
+
},
|
32101 |
+
{
|
32102 |
+
"epoch": 0.8489968614952087,
|
32103 |
+
"grad_norm": 36.4375,
|
32104 |
+
"learning_rate": 9.867344278855955e-06,
|
32105 |
+
"loss": 18.6011,
|
32106 |
+
"step": 45750
|
32107 |
+
},
|
32108 |
+
{
|
32109 |
+
"epoch": 0.8491824345796886,
|
32110 |
+
"grad_norm": 34.75,
|
32111 |
+
"learning_rate": 9.867315283069912e-06,
|
32112 |
+
"loss": 18.7753,
|
32113 |
+
"step": 45760
|
32114 |
+
},
|
32115 |
+
{
|
32116 |
+
"epoch": 0.8493680076641684,
|
32117 |
+
"grad_norm": 36.75,
|
32118 |
+
"learning_rate": 9.86728628728387e-06,
|
32119 |
+
"loss": 19.2827,
|
32120 |
+
"step": 45770
|
32121 |
+
},
|
32122 |
+
{
|
32123 |
+
"epoch": 0.8495535807486482,
|
32124 |
+
"grad_norm": 35.53125,
|
32125 |
+
"learning_rate": 9.867257291497827e-06,
|
32126 |
+
"loss": 18.5532,
|
32127 |
+
"step": 45780
|
32128 |
+
},
|
32129 |
+
{
|
32130 |
+
"epoch": 0.849739153833128,
|
32131 |
+
"grad_norm": 37.34375,
|
32132 |
+
"learning_rate": 9.867228295711785e-06,
|
32133 |
+
"loss": 19.2479,
|
32134 |
+
"step": 45790
|
32135 |
+
},
|
32136 |
+
{
|
32137 |
+
"epoch": 0.8499247269176079,
|
32138 |
+
"grad_norm": 37.6875,
|
32139 |
+
"learning_rate": 9.867199299925742e-06,
|
32140 |
+
"loss": 19.3353,
|
32141 |
+
"step": 45800
|
32142 |
+
},
|
32143 |
+
{
|
32144 |
+
"epoch": 0.8501103000020876,
|
32145 |
+
"grad_norm": 37.03125,
|
32146 |
+
"learning_rate": 9.867170304139701e-06,
|
32147 |
+
"loss": 19.4561,
|
32148 |
+
"step": 45810
|
32149 |
+
},
|
32150 |
+
{
|
32151 |
+
"epoch": 0.8502958730865675,
|
32152 |
+
"grad_norm": 36.5,
|
32153 |
+
"learning_rate": 9.867141308353659e-06,
|
32154 |
+
"loss": 19.2609,
|
32155 |
+
"step": 45820
|
32156 |
+
},
|
32157 |
+
{
|
32158 |
+
"epoch": 0.8504814461710474,
|
32159 |
+
"grad_norm": 35.59375,
|
32160 |
+
"learning_rate": 9.867112312567614e-06,
|
32161 |
+
"loss": 18.6296,
|
32162 |
+
"step": 45830
|
32163 |
+
},
|
32164 |
+
{
|
32165 |
+
"epoch": 0.8506670192555271,
|
32166 |
+
"grad_norm": 36.25,
|
32167 |
+
"learning_rate": 9.867083316781573e-06,
|
32168 |
+
"loss": 18.561,
|
32169 |
+
"step": 45840
|
32170 |
+
},
|
32171 |
+
{
|
32172 |
+
"epoch": 0.850852592340007,
|
32173 |
+
"grad_norm": 37.0,
|
32174 |
+
"learning_rate": 9.86705432099553e-06,
|
32175 |
+
"loss": 19.1894,
|
32176 |
+
"step": 45850
|
32177 |
+
},
|
32178 |
+
{
|
32179 |
+
"epoch": 0.8510381654244868,
|
32180 |
+
"grad_norm": 39.0625,
|
32181 |
+
"learning_rate": 9.867025325209488e-06,
|
32182 |
+
"loss": 19.0677,
|
32183 |
+
"step": 45860
|
32184 |
+
},
|
32185 |
+
{
|
32186 |
+
"epoch": 0.8512237385089667,
|
32187 |
+
"grad_norm": 37.15625,
|
32188 |
+
"learning_rate": 9.866996329423446e-06,
|
32189 |
+
"loss": 19.3011,
|
32190 |
+
"step": 45870
|
32191 |
+
},
|
32192 |
+
{
|
32193 |
+
"epoch": 0.8514093115934465,
|
32194 |
+
"grad_norm": 38.15625,
|
32195 |
+
"learning_rate": 9.866967333637403e-06,
|
32196 |
+
"loss": 18.3706,
|
32197 |
+
"step": 45880
|
32198 |
+
},
|
32199 |
+
{
|
32200 |
+
"epoch": 0.8515948846779263,
|
32201 |
+
"grad_norm": 36.5625,
|
32202 |
+
"learning_rate": 9.86693833785136e-06,
|
32203 |
+
"loss": 18.8431,
|
32204 |
+
"step": 45890
|
32205 |
+
},
|
32206 |
+
{
|
32207 |
+
"epoch": 0.8517804577624062,
|
32208 |
+
"grad_norm": 37.78125,
|
32209 |
+
"learning_rate": 9.866909342065318e-06,
|
32210 |
+
"loss": 18.7596,
|
32211 |
+
"step": 45900
|
32212 |
+
},
|
32213 |
+
{
|
32214 |
+
"epoch": 0.8519660308468859,
|
32215 |
+
"grad_norm": 37.40625,
|
32216 |
+
"learning_rate": 9.866880346279277e-06,
|
32217 |
+
"loss": 19.2939,
|
32218 |
+
"step": 45910
|
32219 |
+
},
|
32220 |
+
{
|
32221 |
+
"epoch": 0.8521516039313658,
|
32222 |
+
"grad_norm": 36.96875,
|
32223 |
+
"learning_rate": 9.866851350493234e-06,
|
32224 |
+
"loss": 18.9418,
|
32225 |
+
"step": 45920
|
32226 |
+
},
|
32227 |
+
{
|
32228 |
+
"epoch": 0.8523371770158457,
|
32229 |
+
"grad_norm": 34.34375,
|
32230 |
+
"learning_rate": 9.866822354707192e-06,
|
32231 |
+
"loss": 18.5307,
|
32232 |
+
"step": 45930
|
32233 |
+
},
|
32234 |
+
{
|
32235 |
+
"epoch": 0.8525227501003254,
|
32236 |
+
"grad_norm": 39.0625,
|
32237 |
+
"learning_rate": 9.866793358921149e-06,
|
32238 |
+
"loss": 19.5498,
|
32239 |
+
"step": 45940
|
32240 |
+
},
|
32241 |
+
{
|
32242 |
+
"epoch": 0.8527083231848053,
|
32243 |
+
"grad_norm": 35.28125,
|
32244 |
+
"learning_rate": 9.866764363135107e-06,
|
32245 |
+
"loss": 18.329,
|
32246 |
+
"step": 45950
|
32247 |
+
},
|
32248 |
+
{
|
32249 |
+
"epoch": 0.8528938962692851,
|
32250 |
+
"grad_norm": 37.09375,
|
32251 |
+
"learning_rate": 9.866735367349064e-06,
|
32252 |
+
"loss": 18.971,
|
32253 |
+
"step": 45960
|
32254 |
+
},
|
32255 |
+
{
|
32256 |
+
"epoch": 0.8530794693537649,
|
32257 |
+
"grad_norm": 36.03125,
|
32258 |
+
"learning_rate": 9.866706371563021e-06,
|
32259 |
+
"loss": 18.7174,
|
32260 |
+
"step": 45970
|
32261 |
+
},
|
32262 |
+
{
|
32263 |
+
"epoch": 0.8532650424382447,
|
32264 |
+
"grad_norm": 35.25,
|
32265 |
+
"learning_rate": 9.866677375776979e-06,
|
32266 |
+
"loss": 18.7271,
|
32267 |
+
"step": 45980
|
32268 |
+
},
|
32269 |
+
{
|
32270 |
+
"epoch": 0.8534506155227246,
|
32271 |
+
"grad_norm": 33.03125,
|
32272 |
+
"learning_rate": 9.866648379990936e-06,
|
32273 |
+
"loss": 19.0984,
|
32274 |
+
"step": 45990
|
32275 |
+
},
|
32276 |
+
{
|
32277 |
+
"epoch": 0.8536361886072044,
|
32278 |
+
"grad_norm": 37.09375,
|
32279 |
+
"learning_rate": 9.866619384204894e-06,
|
32280 |
+
"loss": 19.3761,
|
32281 |
+
"step": 46000
|
32282 |
+
},
|
32283 |
+
{
|
32284 |
+
"epoch": 0.8538217616916842,
|
32285 |
+
"grad_norm": 38.1875,
|
32286 |
+
"learning_rate": 9.866590388418853e-06,
|
32287 |
+
"loss": 18.9924,
|
32288 |
+
"step": 46010
|
32289 |
+
},
|
32290 |
+
{
|
32291 |
+
"epoch": 0.8540073347761641,
|
32292 |
+
"grad_norm": 35.90625,
|
32293 |
+
"learning_rate": 9.86656139263281e-06,
|
32294 |
+
"loss": 18.9491,
|
32295 |
+
"step": 46020
|
32296 |
+
},
|
32297 |
+
{
|
32298 |
+
"epoch": 0.854192907860644,
|
32299 |
+
"grad_norm": 34.875,
|
32300 |
+
"learning_rate": 9.866532396846767e-06,
|
32301 |
+
"loss": 19.4705,
|
32302 |
+
"step": 46030
|
32303 |
+
},
|
32304 |
+
{
|
32305 |
+
"epoch": 0.8543784809451237,
|
32306 |
+
"grad_norm": 36.09375,
|
32307 |
+
"learning_rate": 9.866503401060725e-06,
|
32308 |
+
"loss": 18.1977,
|
32309 |
+
"step": 46040
|
32310 |
+
},
|
32311 |
+
{
|
32312 |
+
"epoch": 0.8545640540296036,
|
32313 |
+
"grad_norm": 36.25,
|
32314 |
+
"learning_rate": 9.866474405274682e-06,
|
32315 |
+
"loss": 18.6364,
|
32316 |
+
"step": 46050
|
32317 |
+
},
|
32318 |
+
{
|
32319 |
+
"epoch": 0.8547496271140834,
|
32320 |
+
"grad_norm": 34.6875,
|
32321 |
+
"learning_rate": 9.86644540948864e-06,
|
32322 |
+
"loss": 18.6337,
|
32323 |
+
"step": 46060
|
32324 |
+
},
|
32325 |
+
{
|
32326 |
+
"epoch": 0.8549352001985632,
|
32327 |
+
"grad_norm": 36.9375,
|
32328 |
+
"learning_rate": 9.866416413702597e-06,
|
32329 |
+
"loss": 19.3606,
|
32330 |
+
"step": 46070
|
32331 |
+
},
|
32332 |
+
{
|
32333 |
+
"epoch": 0.855120773283043,
|
32334 |
+
"grad_norm": 36.59375,
|
32335 |
+
"learning_rate": 9.866387417916555e-06,
|
32336 |
+
"loss": 19.6339,
|
32337 |
+
"step": 46080
|
32338 |
+
},
|
32339 |
+
{
|
32340 |
+
"epoch": 0.8553063463675229,
|
32341 |
+
"grad_norm": 36.6875,
|
32342 |
+
"learning_rate": 9.866358422130514e-06,
|
32343 |
+
"loss": 19.3116,
|
32344 |
+
"step": 46090
|
32345 |
+
},
|
32346 |
+
{
|
32347 |
+
"epoch": 0.8554919194520026,
|
32348 |
+
"grad_norm": 37.21875,
|
32349 |
+
"learning_rate": 9.86632942634447e-06,
|
32350 |
+
"loss": 19.3043,
|
32351 |
+
"step": 46100
|
32352 |
+
},
|
32353 |
+
{
|
32354 |
+
"epoch": 0.8556774925364825,
|
32355 |
+
"grad_norm": 36.1875,
|
32356 |
+
"learning_rate": 9.866300430558427e-06,
|
32357 |
+
"loss": 18.8968,
|
32358 |
+
"step": 46110
|
32359 |
+
},
|
32360 |
+
{
|
32361 |
+
"epoch": 0.8558630656209624,
|
32362 |
+
"grad_norm": 36.9375,
|
32363 |
+
"learning_rate": 9.866271434772386e-06,
|
32364 |
+
"loss": 19.0006,
|
32365 |
+
"step": 46120
|
32366 |
+
},
|
32367 |
+
{
|
32368 |
+
"epoch": 0.8560486387054421,
|
32369 |
+
"grad_norm": 35.8125,
|
32370 |
+
"learning_rate": 9.866242438986343e-06,
|
32371 |
+
"loss": 19.0822,
|
32372 |
+
"step": 46130
|
32373 |
+
},
|
32374 |
+
{
|
32375 |
+
"epoch": 0.856234211789922,
|
32376 |
+
"grad_norm": 38.0,
|
32377 |
+
"learning_rate": 9.8662134432003e-06,
|
32378 |
+
"loss": 18.9034,
|
32379 |
+
"step": 46140
|
32380 |
+
},
|
32381 |
+
{
|
32382 |
+
"epoch": 0.8564197848744018,
|
32383 |
+
"grad_norm": 35.46875,
|
32384 |
+
"learning_rate": 9.866184447414258e-06,
|
32385 |
+
"loss": 18.5902,
|
32386 |
+
"step": 46150
|
32387 |
+
},
|
32388 |
+
{
|
32389 |
+
"epoch": 0.8566053579588816,
|
32390 |
+
"grad_norm": 35.84375,
|
32391 |
+
"learning_rate": 9.866155451628215e-06,
|
32392 |
+
"loss": 18.9346,
|
32393 |
+
"step": 46160
|
32394 |
+
},
|
32395 |
+
{
|
32396 |
+
"epoch": 0.8567909310433615,
|
32397 |
+
"grad_norm": 37.09375,
|
32398 |
+
"learning_rate": 9.866126455842173e-06,
|
32399 |
+
"loss": 19.1755,
|
32400 |
+
"step": 46170
|
32401 |
+
},
|
32402 |
+
{
|
32403 |
+
"epoch": 0.8569765041278413,
|
32404 |
+
"grad_norm": 37.46875,
|
32405 |
+
"learning_rate": 9.86609746005613e-06,
|
32406 |
+
"loss": 19.1133,
|
32407 |
+
"step": 46180
|
32408 |
+
},
|
32409 |
+
{
|
32410 |
+
"epoch": 0.8571620772123211,
|
32411 |
+
"grad_norm": 35.875,
|
32412 |
+
"learning_rate": 9.86606846427009e-06,
|
32413 |
+
"loss": 18.9241,
|
32414 |
+
"step": 46190
|
32415 |
+
},
|
32416 |
+
{
|
32417 |
+
"epoch": 0.8573476502968009,
|
32418 |
+
"grad_norm": 35.46875,
|
32419 |
+
"learning_rate": 9.866039468484045e-06,
|
32420 |
+
"loss": 19.4059,
|
32421 |
+
"step": 46200
|
32422 |
+
},
|
32423 |
+
{
|
32424 |
+
"epoch": 0.8575332233812808,
|
32425 |
+
"grad_norm": 33.375,
|
32426 |
+
"learning_rate": 9.866010472698003e-06,
|
32427 |
+
"loss": 19.2368,
|
32428 |
+
"step": 46210
|
32429 |
+
},
|
32430 |
+
{
|
32431 |
+
"epoch": 0.8577187964657607,
|
32432 |
+
"grad_norm": 35.875,
|
32433 |
+
"learning_rate": 9.865981476911962e-06,
|
32434 |
+
"loss": 18.9356,
|
32435 |
+
"step": 46220
|
32436 |
+
},
|
32437 |
+
{
|
32438 |
+
"epoch": 0.8579043695502404,
|
32439 |
+
"grad_norm": 36.6875,
|
32440 |
+
"learning_rate": 9.865952481125919e-06,
|
32441 |
+
"loss": 19.2764,
|
32442 |
+
"step": 46230
|
32443 |
+
},
|
32444 |
+
{
|
32445 |
+
"epoch": 0.8580899426347203,
|
32446 |
+
"grad_norm": 36.8125,
|
32447 |
+
"learning_rate": 9.865923485339876e-06,
|
32448 |
+
"loss": 19.1283,
|
32449 |
+
"step": 46240
|
32450 |
+
},
|
32451 |
+
{
|
32452 |
+
"epoch": 0.8582755157192001,
|
32453 |
+
"grad_norm": 36.4375,
|
32454 |
+
"learning_rate": 9.865894489553834e-06,
|
32455 |
+
"loss": 18.773,
|
32456 |
+
"step": 46250
|
32457 |
+
},
|
32458 |
+
{
|
32459 |
+
"epoch": 0.8584610888036799,
|
32460 |
+
"grad_norm": 34.15625,
|
32461 |
+
"learning_rate": 9.865865493767791e-06,
|
32462 |
+
"loss": 19.0013,
|
32463 |
+
"step": 46260
|
32464 |
+
},
|
32465 |
+
{
|
32466 |
+
"epoch": 0.8586466618881597,
|
32467 |
+
"grad_norm": 35.5,
|
32468 |
+
"learning_rate": 9.865836497981749e-06,
|
32469 |
+
"loss": 19.106,
|
32470 |
+
"step": 46270
|
32471 |
+
},
|
32472 |
+
{
|
32473 |
+
"epoch": 0.8588322349726396,
|
32474 |
+
"grad_norm": 36.46875,
|
32475 |
+
"learning_rate": 9.865807502195706e-06,
|
32476 |
+
"loss": 19.1127,
|
32477 |
+
"step": 46280
|
32478 |
+
},
|
32479 |
+
{
|
32480 |
+
"epoch": 0.8590178080571194,
|
32481 |
+
"grad_norm": 36.03125,
|
32482 |
+
"learning_rate": 9.865778506409665e-06,
|
32483 |
+
"loss": 19.1271,
|
32484 |
+
"step": 46290
|
32485 |
+
},
|
32486 |
+
{
|
32487 |
+
"epoch": 0.8592033811415992,
|
32488 |
+
"grad_norm": 36.25,
|
32489 |
+
"learning_rate": 9.865749510623623e-06,
|
32490 |
+
"loss": 18.7041,
|
32491 |
+
"step": 46300
|
32492 |
+
},
|
32493 |
+
{
|
32494 |
+
"epoch": 0.8593889542260791,
|
32495 |
+
"grad_norm": 35.96875,
|
32496 |
+
"learning_rate": 9.865720514837578e-06,
|
32497 |
+
"loss": 19.1869,
|
32498 |
+
"step": 46310
|
32499 |
+
},
|
32500 |
+
{
|
32501 |
+
"epoch": 0.8595745273105588,
|
32502 |
+
"grad_norm": 37.34375,
|
32503 |
+
"learning_rate": 9.865691519051537e-06,
|
32504 |
+
"loss": 19.1163,
|
32505 |
+
"step": 46320
|
32506 |
+
},
|
32507 |
+
{
|
32508 |
+
"epoch": 0.8597601003950387,
|
32509 |
+
"grad_norm": 38.09375,
|
32510 |
+
"learning_rate": 9.865662523265495e-06,
|
32511 |
+
"loss": 18.9266,
|
32512 |
+
"step": 46330
|
32513 |
+
},
|
32514 |
+
{
|
32515 |
+
"epoch": 0.8599456734795186,
|
32516 |
+
"grad_norm": 36.0,
|
32517 |
+
"learning_rate": 9.865633527479452e-06,
|
32518 |
+
"loss": 19.044,
|
32519 |
+
"step": 46340
|
32520 |
+
},
|
32521 |
+
{
|
32522 |
+
"epoch": 0.8601312465639983,
|
32523 |
+
"grad_norm": 36.96875,
|
32524 |
+
"learning_rate": 9.86560453169341e-06,
|
32525 |
+
"loss": 19.1646,
|
32526 |
+
"step": 46350
|
32527 |
+
},
|
32528 |
+
{
|
32529 |
+
"epoch": 0.8603168196484782,
|
32530 |
+
"grad_norm": 38.4375,
|
32531 |
+
"learning_rate": 9.865575535907369e-06,
|
32532 |
+
"loss": 18.5803,
|
32533 |
+
"step": 46360
|
32534 |
+
},
|
32535 |
+
{
|
32536 |
+
"epoch": 0.860502392732958,
|
32537 |
+
"grad_norm": 36.25,
|
32538 |
+
"learning_rate": 9.865546540121324e-06,
|
32539 |
+
"loss": 19.0525,
|
32540 |
+
"step": 46370
|
32541 |
+
},
|
32542 |
+
{
|
32543 |
+
"epoch": 0.8606879658174378,
|
32544 |
+
"grad_norm": 36.78125,
|
32545 |
+
"learning_rate": 9.865517544335282e-06,
|
32546 |
+
"loss": 18.6633,
|
32547 |
+
"step": 46380
|
32548 |
+
},
|
32549 |
+
{
|
32550 |
+
"epoch": 0.8608735389019176,
|
32551 |
+
"grad_norm": 35.59375,
|
32552 |
+
"learning_rate": 9.865488548549241e-06,
|
32553 |
+
"loss": 19.2111,
|
32554 |
+
"step": 46390
|
32555 |
+
},
|
32556 |
+
{
|
32557 |
+
"epoch": 0.8610591119863975,
|
32558 |
+
"grad_norm": 37.34375,
|
32559 |
+
"learning_rate": 9.865459552763198e-06,
|
32560 |
+
"loss": 19.2641,
|
32561 |
+
"step": 46400
|
32562 |
+
},
|
32563 |
+
{
|
32564 |
+
"epoch": 0.8612446850708774,
|
32565 |
+
"grad_norm": 35.1875,
|
32566 |
+
"learning_rate": 9.865430556977156e-06,
|
32567 |
+
"loss": 19.0881,
|
32568 |
+
"step": 46410
|
32569 |
+
},
|
32570 |
+
{
|
32571 |
+
"epoch": 0.8614302581553571,
|
32572 |
+
"grad_norm": 38.5625,
|
32573 |
+
"learning_rate": 9.865401561191113e-06,
|
32574 |
+
"loss": 19.2938,
|
32575 |
+
"step": 46420
|
32576 |
+
},
|
32577 |
+
{
|
32578 |
+
"epoch": 0.861615831239837,
|
32579 |
+
"grad_norm": 36.21875,
|
32580 |
+
"learning_rate": 9.86537256540507e-06,
|
32581 |
+
"loss": 18.9959,
|
32582 |
+
"step": 46430
|
32583 |
+
},
|
32584 |
+
{
|
32585 |
+
"epoch": 0.8618014043243168,
|
32586 |
+
"grad_norm": 36.40625,
|
32587 |
+
"learning_rate": 9.865343569619028e-06,
|
32588 |
+
"loss": 19.1233,
|
32589 |
+
"step": 46440
|
32590 |
+
},
|
32591 |
+
{
|
32592 |
+
"epoch": 0.8619869774087966,
|
32593 |
+
"grad_norm": 39.0625,
|
32594 |
+
"learning_rate": 9.865314573832985e-06,
|
32595 |
+
"loss": 18.7088,
|
32596 |
+
"step": 46450
|
32597 |
+
},
|
32598 |
+
{
|
32599 |
+
"epoch": 0.8621725504932765,
|
32600 |
+
"grad_norm": 35.25,
|
32601 |
+
"learning_rate": 9.865285578046944e-06,
|
32602 |
+
"loss": 18.7742,
|
32603 |
+
"step": 46460
|
32604 |
+
},
|
32605 |
+
{
|
32606 |
+
"epoch": 0.8623581235777563,
|
32607 |
+
"grad_norm": 37.875,
|
32608 |
+
"learning_rate": 9.8652565822609e-06,
|
32609 |
+
"loss": 19.0445,
|
32610 |
+
"step": 46470
|
32611 |
+
},
|
32612 |
+
{
|
32613 |
+
"epoch": 0.8625436966622361,
|
32614 |
+
"grad_norm": 36.125,
|
32615 |
+
"learning_rate": 9.865227586474858e-06,
|
32616 |
+
"loss": 18.9246,
|
32617 |
+
"step": 46480
|
32618 |
+
},
|
32619 |
+
{
|
32620 |
+
"epoch": 0.8627292697467159,
|
32621 |
+
"grad_norm": 35.8125,
|
32622 |
+
"learning_rate": 9.865198590688817e-06,
|
32623 |
+
"loss": 19.4552,
|
32624 |
+
"step": 46490
|
32625 |
+
},
|
32626 |
+
{
|
32627 |
+
"epoch": 0.8629148428311958,
|
32628 |
+
"grad_norm": 37.59375,
|
32629 |
+
"learning_rate": 9.865169594902774e-06,
|
32630 |
+
"loss": 19.0426,
|
32631 |
+
"step": 46500
|
32632 |
+
},
|
32633 |
+
{
|
32634 |
+
"epoch": 0.8631004159156755,
|
32635 |
+
"grad_norm": 35.0,
|
32636 |
+
"learning_rate": 9.865140599116732e-06,
|
32637 |
+
"loss": 18.9247,
|
32638 |
+
"step": 46510
|
32639 |
+
},
|
32640 |
+
{
|
32641 |
+
"epoch": 0.8632859890001554,
|
32642 |
+
"grad_norm": 36.28125,
|
32643 |
+
"learning_rate": 9.865111603330689e-06,
|
32644 |
+
"loss": 19.2894,
|
32645 |
+
"step": 46520
|
32646 |
+
},
|
32647 |
+
{
|
32648 |
+
"epoch": 0.8634715620846353,
|
32649 |
+
"grad_norm": 36.78125,
|
32650 |
+
"learning_rate": 9.865082607544646e-06,
|
32651 |
+
"loss": 18.7233,
|
32652 |
+
"step": 46530
|
32653 |
+
},
|
32654 |
+
{
|
32655 |
+
"epoch": 0.863657135169115,
|
32656 |
+
"grad_norm": 35.25,
|
32657 |
+
"learning_rate": 9.865053611758604e-06,
|
32658 |
+
"loss": 19.2367,
|
32659 |
+
"step": 46540
|
32660 |
+
},
|
32661 |
+
{
|
32662 |
+
"epoch": 0.8638427082535949,
|
32663 |
+
"grad_norm": 38.46875,
|
32664 |
+
"learning_rate": 9.865024615972561e-06,
|
32665 |
+
"loss": 19.3671,
|
32666 |
+
"step": 46550
|
32667 |
+
},
|
32668 |
+
{
|
32669 |
+
"epoch": 0.8640282813380747,
|
32670 |
+
"grad_norm": 33.9375,
|
32671 |
+
"learning_rate": 9.864995620186519e-06,
|
32672 |
+
"loss": 19.4396,
|
32673 |
+
"step": 46560
|
32674 |
+
},
|
32675 |
+
{
|
32676 |
+
"epoch": 0.8642138544225546,
|
32677 |
+
"grad_norm": 37.15625,
|
32678 |
+
"learning_rate": 9.864966624400478e-06,
|
32679 |
+
"loss": 18.6888,
|
32680 |
+
"step": 46570
|
32681 |
+
},
|
32682 |
+
{
|
32683 |
+
"epoch": 0.8643994275070344,
|
32684 |
+
"grad_norm": 35.40625,
|
32685 |
+
"learning_rate": 9.864937628614433e-06,
|
32686 |
+
"loss": 19.0491,
|
32687 |
+
"step": 46580
|
32688 |
+
},
|
32689 |
+
{
|
32690 |
+
"epoch": 0.8645850005915142,
|
32691 |
+
"grad_norm": 38.65625,
|
32692 |
+
"learning_rate": 9.864908632828392e-06,
|
32693 |
+
"loss": 19.1106,
|
32694 |
+
"step": 46590
|
32695 |
+
},
|
32696 |
+
{
|
32697 |
+
"epoch": 0.8647705736759941,
|
32698 |
+
"grad_norm": 34.84375,
|
32699 |
+
"learning_rate": 9.86487963704235e-06,
|
32700 |
+
"loss": 19.0092,
|
32701 |
+
"step": 46600
|
32702 |
+
},
|
32703 |
+
{
|
32704 |
+
"epoch": 0.8649561467604738,
|
32705 |
+
"grad_norm": 35.625,
|
32706 |
+
"learning_rate": 9.864850641256307e-06,
|
32707 |
+
"loss": 19.4733,
|
32708 |
+
"step": 46610
|
32709 |
+
},
|
32710 |
+
{
|
32711 |
+
"epoch": 0.8651417198449537,
|
32712 |
+
"grad_norm": 38.1875,
|
32713 |
+
"learning_rate": 9.864821645470265e-06,
|
32714 |
+
"loss": 18.892,
|
32715 |
+
"step": 46620
|
32716 |
+
},
|
32717 |
+
{
|
32718 |
+
"epoch": 0.8653272929294336,
|
32719 |
+
"grad_norm": 35.6875,
|
32720 |
+
"learning_rate": 9.864792649684222e-06,
|
32721 |
+
"loss": 18.8302,
|
32722 |
+
"step": 46630
|
32723 |
+
},
|
32724 |
+
{
|
32725 |
+
"epoch": 0.8655128660139133,
|
32726 |
+
"grad_norm": 35.09375,
|
32727 |
+
"learning_rate": 9.86476365389818e-06,
|
32728 |
+
"loss": 19.2244,
|
32729 |
+
"step": 46640
|
32730 |
+
},
|
32731 |
+
{
|
32732 |
+
"epoch": 0.8656984390983932,
|
32733 |
+
"grad_norm": 35.40625,
|
32734 |
+
"learning_rate": 9.864734658112137e-06,
|
32735 |
+
"loss": 18.9491,
|
32736 |
+
"step": 46650
|
32737 |
+
},
|
32738 |
+
{
|
32739 |
+
"epoch": 0.865884012182873,
|
32740 |
+
"grad_norm": 35.9375,
|
32741 |
+
"learning_rate": 9.864705662326094e-06,
|
32742 |
+
"loss": 18.688,
|
32743 |
+
"step": 46660
|
32744 |
+
},
|
32745 |
+
{
|
32746 |
+
"epoch": 0.8660695852673528,
|
32747 |
+
"grad_norm": 36.4375,
|
32748 |
+
"learning_rate": 9.864676666540053e-06,
|
32749 |
+
"loss": 18.8324,
|
32750 |
+
"step": 46670
|
32751 |
+
},
|
32752 |
+
{
|
32753 |
+
"epoch": 0.8662551583518326,
|
32754 |
+
"grad_norm": 37.3125,
|
32755 |
+
"learning_rate": 9.86464767075401e-06,
|
32756 |
+
"loss": 18.8215,
|
32757 |
+
"step": 46680
|
32758 |
+
},
|
32759 |
+
{
|
32760 |
+
"epoch": 0.8664407314363125,
|
32761 |
+
"grad_norm": 36.65625,
|
32762 |
+
"learning_rate": 9.864618674967967e-06,
|
32763 |
+
"loss": 18.8459,
|
32764 |
+
"step": 46690
|
32765 |
+
},
|
32766 |
+
{
|
32767 |
+
"epoch": 0.8666263045207923,
|
32768 |
+
"grad_norm": 35.34375,
|
32769 |
+
"learning_rate": 9.864589679181926e-06,
|
32770 |
+
"loss": 18.5788,
|
32771 |
+
"step": 46700
|
32772 |
+
},
|
32773 |
+
{
|
32774 |
+
"epoch": 0.8668118776052721,
|
32775 |
+
"grad_norm": 36.3125,
|
32776 |
+
"learning_rate": 9.864560683395883e-06,
|
32777 |
+
"loss": 18.8399,
|
32778 |
+
"step": 46710
|
32779 |
+
},
|
32780 |
+
{
|
32781 |
+
"epoch": 0.866997450689752,
|
32782 |
+
"grad_norm": 36.1875,
|
32783 |
+
"learning_rate": 9.86453168760984e-06,
|
32784 |
+
"loss": 19.4052,
|
32785 |
+
"step": 46720
|
32786 |
+
},
|
32787 |
+
{
|
32788 |
+
"epoch": 0.8671830237742317,
|
32789 |
+
"grad_norm": 36.46875,
|
32790 |
+
"learning_rate": 9.864502691823798e-06,
|
32791 |
+
"loss": 18.9626,
|
32792 |
+
"step": 46730
|
32793 |
+
},
|
32794 |
+
{
|
32795 |
+
"epoch": 0.8673685968587116,
|
32796 |
+
"grad_norm": 37.1875,
|
32797 |
+
"learning_rate": 9.864473696037755e-06,
|
32798 |
+
"loss": 18.9508,
|
32799 |
+
"step": 46740
|
32800 |
+
},
|
32801 |
+
{
|
32802 |
+
"epoch": 0.8675541699431915,
|
32803 |
+
"grad_norm": 34.65625,
|
32804 |
+
"learning_rate": 9.864444700251713e-06,
|
32805 |
+
"loss": 18.9008,
|
32806 |
+
"step": 46750
|
32807 |
+
},
|
32808 |
+
{
|
32809 |
+
"epoch": 0.8677397430276713,
|
32810 |
+
"grad_norm": 38.40625,
|
32811 |
+
"learning_rate": 9.86441570446567e-06,
|
32812 |
+
"loss": 18.8394,
|
32813 |
+
"step": 46760
|
32814 |
+
},
|
32815 |
+
{
|
32816 |
+
"epoch": 0.8679253161121511,
|
32817 |
+
"grad_norm": 35.96875,
|
32818 |
+
"learning_rate": 9.86438670867963e-06,
|
32819 |
+
"loss": 18.9625,
|
32820 |
+
"step": 46770
|
32821 |
+
},
|
32822 |
+
{
|
32823 |
+
"epoch": 0.8681108891966309,
|
32824 |
+
"grad_norm": 34.5625,
|
32825 |
+
"learning_rate": 9.864357712893587e-06,
|
32826 |
+
"loss": 18.8886,
|
32827 |
+
"step": 46780
|
32828 |
+
},
|
32829 |
+
{
|
32830 |
+
"epoch": 0.8682964622811108,
|
32831 |
+
"grad_norm": 38.40625,
|
32832 |
+
"learning_rate": 9.864328717107542e-06,
|
32833 |
+
"loss": 18.8803,
|
32834 |
+
"step": 46790
|
32835 |
+
},
|
32836 |
+
{
|
32837 |
+
"epoch": 0.8684820353655905,
|
32838 |
+
"grad_norm": 35.84375,
|
32839 |
+
"learning_rate": 9.864299721321501e-06,
|
32840 |
+
"loss": 19.1182,
|
32841 |
+
"step": 46800
|
32842 |
+
},
|
32843 |
+
{
|
32844 |
+
"epoch": 0.8686676084500704,
|
32845 |
+
"grad_norm": 37.40625,
|
32846 |
+
"learning_rate": 9.864270725535459e-06,
|
32847 |
+
"loss": 18.4934,
|
32848 |
+
"step": 46810
|
32849 |
+
},
|
32850 |
+
{
|
32851 |
+
"epoch": 0.8688531815345503,
|
32852 |
+
"grad_norm": 36.875,
|
32853 |
+
"learning_rate": 9.864241729749416e-06,
|
32854 |
+
"loss": 18.8362,
|
32855 |
+
"step": 46820
|
32856 |
+
},
|
32857 |
+
{
|
32858 |
+
"epoch": 0.86903875461903,
|
32859 |
+
"grad_norm": 37.46875,
|
32860 |
+
"learning_rate": 9.864212733963374e-06,
|
32861 |
+
"loss": 18.8735,
|
32862 |
+
"step": 46830
|
32863 |
+
},
|
32864 |
+
{
|
32865 |
+
"epoch": 0.8692243277035099,
|
32866 |
+
"grad_norm": 34.375,
|
32867 |
+
"learning_rate": 9.864183738177333e-06,
|
32868 |
+
"loss": 19.1104,
|
32869 |
+
"step": 46840
|
32870 |
+
},
|
32871 |
+
{
|
32872 |
+
"epoch": 0.8694099007879897,
|
32873 |
+
"grad_norm": 36.375,
|
32874 |
+
"learning_rate": 9.864154742391288e-06,
|
32875 |
+
"loss": 18.7271,
|
32876 |
+
"step": 46850
|
32877 |
+
},
|
32878 |
+
{
|
32879 |
+
"epoch": 0.8695954738724695,
|
32880 |
+
"grad_norm": 37.0,
|
32881 |
+
"learning_rate": 9.864125746605246e-06,
|
32882 |
+
"loss": 19.1983,
|
32883 |
+
"step": 46860
|
32884 |
+
},
|
32885 |
+
{
|
32886 |
+
"epoch": 0.8697810469569494,
|
32887 |
+
"grad_norm": 36.5625,
|
32888 |
+
"learning_rate": 9.864096750819205e-06,
|
32889 |
+
"loss": 19.5429,
|
32890 |
+
"step": 46870
|
32891 |
+
},
|
32892 |
+
{
|
32893 |
+
"epoch": 0.8699666200414292,
|
32894 |
+
"grad_norm": 36.90625,
|
32895 |
+
"learning_rate": 9.864067755033162e-06,
|
32896 |
+
"loss": 19.0335,
|
32897 |
+
"step": 46880
|
32898 |
+
},
|
32899 |
+
{
|
32900 |
+
"epoch": 0.870152193125909,
|
32901 |
+
"grad_norm": 38.6875,
|
32902 |
+
"learning_rate": 9.86403875924712e-06,
|
32903 |
+
"loss": 19.148,
|
32904 |
+
"step": 46890
|
32905 |
+
},
|
32906 |
+
{
|
32907 |
+
"epoch": 0.8703377662103888,
|
32908 |
+
"grad_norm": 36.5,
|
32909 |
+
"learning_rate": 9.864009763461077e-06,
|
32910 |
+
"loss": 19.0772,
|
32911 |
+
"step": 46900
|
32912 |
+
},
|
32913 |
+
{
|
32914 |
+
"epoch": 0.8705233392948687,
|
32915 |
+
"grad_norm": 37.5625,
|
32916 |
+
"learning_rate": 9.863980767675035e-06,
|
32917 |
+
"loss": 18.9625,
|
32918 |
+
"step": 46910
|
32919 |
+
},
|
32920 |
+
{
|
32921 |
+
"epoch": 0.8707089123793486,
|
32922 |
+
"grad_norm": 36.0,
|
32923 |
+
"learning_rate": 9.863951771888992e-06,
|
32924 |
+
"loss": 18.4917,
|
32925 |
+
"step": 46920
|
32926 |
+
},
|
32927 |
+
{
|
32928 |
+
"epoch": 0.8708944854638283,
|
32929 |
+
"grad_norm": 34.78125,
|
32930 |
+
"learning_rate": 9.86392277610295e-06,
|
32931 |
+
"loss": 19.0948,
|
32932 |
+
"step": 46930
|
32933 |
+
},
|
32934 |
+
{
|
32935 |
+
"epoch": 0.8710800585483082,
|
32936 |
+
"grad_norm": 36.84375,
|
32937 |
+
"learning_rate": 9.863893780316908e-06,
|
32938 |
+
"loss": 18.7454,
|
32939 |
+
"step": 46940
|
32940 |
+
},
|
32941 |
+
{
|
32942 |
+
"epoch": 0.871265631632788,
|
32943 |
+
"grad_norm": 35.4375,
|
32944 |
+
"learning_rate": 9.863864784530866e-06,
|
32945 |
+
"loss": 19.0614,
|
32946 |
+
"step": 46950
|
32947 |
+
},
|
32948 |
+
{
|
32949 |
+
"epoch": 0.8714512047172678,
|
32950 |
+
"grad_norm": 35.65625,
|
32951 |
+
"learning_rate": 9.863835788744822e-06,
|
32952 |
+
"loss": 19.0184,
|
32953 |
+
"step": 46960
|
32954 |
+
},
|
32955 |
+
{
|
32956 |
+
"epoch": 0.8716367778017476,
|
32957 |
+
"grad_norm": 36.0625,
|
32958 |
+
"learning_rate": 9.86380679295878e-06,
|
32959 |
+
"loss": 18.9749,
|
32960 |
+
"step": 46970
|
32961 |
+
},
|
32962 |
+
{
|
32963 |
+
"epoch": 0.8718223508862275,
|
32964 |
+
"grad_norm": 38.625,
|
32965 |
+
"learning_rate": 9.863777797172738e-06,
|
32966 |
+
"loss": 18.9738,
|
32967 |
+
"step": 46980
|
32968 |
+
},
|
32969 |
+
{
|
32970 |
+
"epoch": 0.8720079239707073,
|
32971 |
+
"grad_norm": 36.0625,
|
32972 |
+
"learning_rate": 9.863748801386696e-06,
|
32973 |
+
"loss": 18.6942,
|
32974 |
+
"step": 46990
|
32975 |
+
},
|
32976 |
+
{
|
32977 |
+
"epoch": 0.8721934970551871,
|
32978 |
+
"grad_norm": 37.3125,
|
32979 |
+
"learning_rate": 9.863719805600653e-06,
|
32980 |
+
"loss": 19.1092,
|
32981 |
+
"step": 47000
|
32982 |
+
},
|
32983 |
+
{
|
32984 |
+
"epoch": 0.872379070139667,
|
32985 |
+
"grad_norm": 36.375,
|
32986 |
+
"learning_rate": 9.86369080981461e-06,
|
32987 |
+
"loss": 19.1041,
|
32988 |
+
"step": 47010
|
32989 |
+
},
|
32990 |
+
{
|
32991 |
+
"epoch": 0.8725646432241467,
|
32992 |
+
"grad_norm": 36.46875,
|
32993 |
+
"learning_rate": 9.863661814028568e-06,
|
32994 |
+
"loss": 18.8907,
|
32995 |
+
"step": 47020
|
32996 |
+
},
|
32997 |
+
{
|
32998 |
+
"epoch": 0.8727502163086266,
|
32999 |
+
"grad_norm": 36.625,
|
33000 |
+
"learning_rate": 9.863632818242525e-06,
|
33001 |
+
"loss": 18.604,
|
33002 |
+
"step": 47030
|
33003 |
+
},
|
33004 |
+
{
|
33005 |
+
"epoch": 0.8729357893931065,
|
33006 |
+
"grad_norm": 34.40625,
|
33007 |
+
"learning_rate": 9.863603822456483e-06,
|
33008 |
+
"loss": 18.9317,
|
33009 |
+
"step": 47040
|
33010 |
+
},
|
33011 |
+
{
|
33012 |
+
"epoch": 0.8731213624775862,
|
33013 |
+
"grad_norm": 35.78125,
|
33014 |
+
"learning_rate": 9.863574826670442e-06,
|
33015 |
+
"loss": 18.8261,
|
33016 |
+
"step": 47050
|
33017 |
+
},
|
33018 |
+
{
|
33019 |
+
"epoch": 0.8733069355620661,
|
33020 |
+
"grad_norm": 35.375,
|
33021 |
+
"learning_rate": 9.863545830884397e-06,
|
33022 |
+
"loss": 18.7842,
|
33023 |
+
"step": 47060
|
33024 |
+
},
|
33025 |
+
{
|
33026 |
+
"epoch": 0.8734925086465459,
|
33027 |
+
"grad_norm": 35.65625,
|
33028 |
+
"learning_rate": 9.863516835098356e-06,
|
33029 |
+
"loss": 19.1264,
|
33030 |
+
"step": 47070
|
33031 |
+
},
|
33032 |
+
{
|
33033 |
+
"epoch": 0.8736780817310257,
|
33034 |
+
"grad_norm": 37.59375,
|
33035 |
+
"learning_rate": 9.863487839312314e-06,
|
33036 |
+
"loss": 18.9965,
|
33037 |
+
"step": 47080
|
33038 |
+
},
|
33039 |
+
{
|
33040 |
+
"epoch": 0.8738636548155055,
|
33041 |
+
"grad_norm": 34.65625,
|
33042 |
+
"learning_rate": 9.863458843526271e-06,
|
33043 |
+
"loss": 18.7441,
|
33044 |
+
"step": 47090
|
33045 |
+
},
|
33046 |
+
{
|
33047 |
+
"epoch": 0.8740492278999854,
|
33048 |
+
"grad_norm": 35.03125,
|
33049 |
+
"learning_rate": 9.863429847740229e-06,
|
33050 |
+
"loss": 19.2158,
|
33051 |
+
"step": 47100
|
33052 |
+
},
|
33053 |
+
{
|
33054 |
+
"epoch": 0.8742348009844653,
|
33055 |
+
"grad_norm": 37.53125,
|
33056 |
+
"learning_rate": 9.863400851954186e-06,
|
33057 |
+
"loss": 19.4903,
|
33058 |
+
"step": 47110
|
33059 |
+
},
|
33060 |
+
{
|
33061 |
+
"epoch": 0.874420374068945,
|
33062 |
+
"grad_norm": 35.09375,
|
33063 |
+
"learning_rate": 9.863371856168144e-06,
|
33064 |
+
"loss": 19.3462,
|
33065 |
+
"step": 47120
|
33066 |
+
},
|
33067 |
+
{
|
33068 |
+
"epoch": 0.8746059471534249,
|
33069 |
+
"grad_norm": 34.96875,
|
33070 |
+
"learning_rate": 9.863342860382101e-06,
|
33071 |
+
"loss": 18.5112,
|
33072 |
+
"step": 47130
|
33073 |
+
},
|
33074 |
+
{
|
33075 |
+
"epoch": 0.8747915202379047,
|
33076 |
+
"grad_norm": 35.84375,
|
33077 |
+
"learning_rate": 9.863313864596058e-06,
|
33078 |
+
"loss": 18.7684,
|
33079 |
+
"step": 47140
|
33080 |
+
},
|
33081 |
+
{
|
33082 |
+
"epoch": 0.8749770933223845,
|
33083 |
+
"grad_norm": 37.84375,
|
33084 |
+
"learning_rate": 9.863284868810017e-06,
|
33085 |
+
"loss": 18.657,
|
33086 |
+
"step": 47150
|
33087 |
+
},
|
33088 |
+
{
|
33089 |
+
"epoch": 0.8751626664068644,
|
33090 |
+
"grad_norm": 34.625,
|
33091 |
+
"learning_rate": 9.863255873023975e-06,
|
33092 |
+
"loss": 18.6456,
|
33093 |
+
"step": 47160
|
33094 |
+
},
|
33095 |
+
{
|
33096 |
+
"epoch": 0.8753482394913442,
|
33097 |
+
"grad_norm": 34.75,
|
33098 |
+
"learning_rate": 9.86322687723793e-06,
|
33099 |
+
"loss": 18.837,
|
33100 |
+
"step": 47170
|
33101 |
+
},
|
33102 |
+
{
|
33103 |
+
"epoch": 0.875533812575824,
|
33104 |
+
"grad_norm": 36.25,
|
33105 |
+
"learning_rate": 9.86319788145189e-06,
|
33106 |
+
"loss": 18.7192,
|
33107 |
+
"step": 47180
|
33108 |
+
},
|
33109 |
+
{
|
33110 |
+
"epoch": 0.8757193856603038,
|
33111 |
+
"grad_norm": 36.1875,
|
33112 |
+
"learning_rate": 9.863168885665847e-06,
|
33113 |
+
"loss": 18.5609,
|
33114 |
+
"step": 47190
|
33115 |
+
},
|
33116 |
+
{
|
33117 |
+
"epoch": 0.8759049587447837,
|
33118 |
+
"grad_norm": 35.5625,
|
33119 |
+
"learning_rate": 9.863139889879804e-06,
|
33120 |
+
"loss": 18.5446,
|
33121 |
+
"step": 47200
|
33122 |
+
},
|
33123 |
+
{
|
33124 |
+
"epoch": 0.8760905318292634,
|
33125 |
+
"grad_norm": 35.34375,
|
33126 |
+
"learning_rate": 9.863110894093762e-06,
|
33127 |
+
"loss": 19.2299,
|
33128 |
+
"step": 47210
|
33129 |
+
},
|
33130 |
+
{
|
33131 |
+
"epoch": 0.8762761049137433,
|
33132 |
+
"grad_norm": 36.78125,
|
33133 |
+
"learning_rate": 9.86308189830772e-06,
|
33134 |
+
"loss": 18.9342,
|
33135 |
+
"step": 47220
|
33136 |
+
},
|
33137 |
+
{
|
33138 |
+
"epoch": 0.8764616779982232,
|
33139 |
+
"grad_norm": 35.5,
|
33140 |
+
"learning_rate": 9.863052902521677e-06,
|
33141 |
+
"loss": 18.7533,
|
33142 |
+
"step": 47230
|
33143 |
+
},
|
33144 |
+
{
|
33145 |
+
"epoch": 0.8766472510827029,
|
33146 |
+
"grad_norm": 36.28125,
|
33147 |
+
"learning_rate": 9.863023906735634e-06,
|
33148 |
+
"loss": 18.6551,
|
33149 |
+
"step": 47240
|
33150 |
+
},
|
33151 |
+
{
|
33152 |
+
"epoch": 0.8768328241671828,
|
33153 |
+
"grad_norm": 36.84375,
|
33154 |
+
"learning_rate": 9.862994910949593e-06,
|
33155 |
+
"loss": 18.4443,
|
33156 |
+
"step": 47250
|
33157 |
+
},
|
33158 |
+
{
|
33159 |
+
"epoch": 0.8770183972516626,
|
33160 |
+
"grad_norm": 38.1875,
|
33161 |
+
"learning_rate": 9.86296591516355e-06,
|
33162 |
+
"loss": 18.5278,
|
33163 |
+
"step": 47260
|
33164 |
+
},
|
33165 |
+
{
|
33166 |
+
"epoch": 0.8772039703361424,
|
33167 |
+
"grad_norm": 37.03125,
|
33168 |
+
"learning_rate": 9.862936919377508e-06,
|
33169 |
+
"loss": 18.8955,
|
33170 |
+
"step": 47270
|
33171 |
+
},
|
33172 |
+
{
|
33173 |
+
"epoch": 0.8773895434206223,
|
33174 |
+
"grad_norm": 37.125,
|
33175 |
+
"learning_rate": 9.862907923591465e-06,
|
33176 |
+
"loss": 18.8201,
|
33177 |
+
"step": 47280
|
33178 |
+
},
|
33179 |
+
{
|
33180 |
+
"epoch": 0.8775751165051021,
|
33181 |
+
"grad_norm": 35.0625,
|
33182 |
+
"learning_rate": 9.862878927805423e-06,
|
33183 |
+
"loss": 18.9443,
|
33184 |
+
"step": 47290
|
33185 |
+
},
|
33186 |
+
{
|
33187 |
+
"epoch": 0.877760689589582,
|
33188 |
+
"grad_norm": 34.78125,
|
33189 |
+
"learning_rate": 9.86284993201938e-06,
|
33190 |
+
"loss": 19.0257,
|
33191 |
+
"step": 47300
|
33192 |
+
},
|
33193 |
+
{
|
33194 |
+
"epoch": 0.8779462626740617,
|
33195 |
+
"grad_norm": 37.40625,
|
33196 |
+
"learning_rate": 9.862820936233338e-06,
|
33197 |
+
"loss": 19.4971,
|
33198 |
+
"step": 47310
|
33199 |
+
},
|
33200 |
+
{
|
33201 |
+
"epoch": 0.8781318357585416,
|
33202 |
+
"grad_norm": 35.75,
|
33203 |
+
"learning_rate": 9.862791940447297e-06,
|
33204 |
+
"loss": 19.3668,
|
33205 |
+
"step": 47320
|
33206 |
+
},
|
33207 |
+
{
|
33208 |
+
"epoch": 0.8783174088430215,
|
33209 |
+
"grad_norm": 36.40625,
|
33210 |
+
"learning_rate": 9.862762944661252e-06,
|
33211 |
+
"loss": 18.8734,
|
33212 |
+
"step": 47330
|
33213 |
+
},
|
33214 |
+
{
|
33215 |
+
"epoch": 0.8785029819275012,
|
33216 |
+
"grad_norm": 35.8125,
|
33217 |
+
"learning_rate": 9.86273394887521e-06,
|
33218 |
+
"loss": 19.0038,
|
33219 |
+
"step": 47340
|
33220 |
+
},
|
33221 |
+
{
|
33222 |
+
"epoch": 0.8786885550119811,
|
33223 |
+
"grad_norm": 36.65625,
|
33224 |
+
"learning_rate": 9.862704953089169e-06,
|
33225 |
+
"loss": 18.8259,
|
33226 |
+
"step": 47350
|
33227 |
+
},
|
33228 |
+
{
|
33229 |
+
"epoch": 0.8788741280964609,
|
33230 |
+
"grad_norm": 36.875,
|
33231 |
+
"learning_rate": 9.862675957303126e-06,
|
33232 |
+
"loss": 18.5228,
|
33233 |
+
"step": 47360
|
33234 |
+
},
|
33235 |
+
{
|
33236 |
+
"epoch": 0.8790597011809407,
|
33237 |
+
"grad_norm": 36.6875,
|
33238 |
+
"learning_rate": 9.862646961517084e-06,
|
33239 |
+
"loss": 19.1345,
|
33240 |
+
"step": 47370
|
33241 |
+
},
|
33242 |
+
{
|
33243 |
+
"epoch": 0.8792452742654205,
|
33244 |
+
"grad_norm": 36.0625,
|
33245 |
+
"learning_rate": 9.862617965731041e-06,
|
33246 |
+
"loss": 19.2282,
|
33247 |
+
"step": 47380
|
33248 |
+
},
|
33249 |
+
{
|
33250 |
+
"epoch": 0.8794308473499004,
|
33251 |
+
"grad_norm": 37.0,
|
33252 |
+
"learning_rate": 9.862588969944999e-06,
|
33253 |
+
"loss": 19.0534,
|
33254 |
+
"step": 47390
|
33255 |
+
},
|
33256 |
+
{
|
33257 |
+
"epoch": 0.8796164204343802,
|
33258 |
+
"grad_norm": 36.59375,
|
33259 |
+
"learning_rate": 9.862559974158956e-06,
|
33260 |
+
"loss": 19.0539,
|
33261 |
+
"step": 47400
|
33262 |
+
},
|
33263 |
+
{
|
33264 |
+
"epoch": 0.87980199351886,
|
33265 |
+
"grad_norm": 37.78125,
|
33266 |
+
"learning_rate": 9.862530978372913e-06,
|
33267 |
+
"loss": 18.8846,
|
33268 |
+
"step": 47410
|
33269 |
+
},
|
33270 |
+
{
|
33271 |
+
"epoch": 0.8799875666033399,
|
33272 |
+
"grad_norm": 35.5625,
|
33273 |
+
"learning_rate": 9.862501982586873e-06,
|
33274 |
+
"loss": 19.2657,
|
33275 |
+
"step": 47420
|
33276 |
+
},
|
33277 |
+
{
|
33278 |
+
"epoch": 0.8801731396878196,
|
33279 |
+
"grad_norm": 37.09375,
|
33280 |
+
"learning_rate": 9.86247298680083e-06,
|
33281 |
+
"loss": 18.8702,
|
33282 |
+
"step": 47430
|
33283 |
+
},
|
33284 |
+
{
|
33285 |
+
"epoch": 0.8803587127722995,
|
33286 |
+
"grad_norm": 36.71875,
|
33287 |
+
"learning_rate": 9.862443991014786e-06,
|
33288 |
+
"loss": 18.7863,
|
33289 |
+
"step": 47440
|
33290 |
+
},
|
33291 |
+
{
|
33292 |
+
"epoch": 0.8805442858567794,
|
33293 |
+
"grad_norm": 36.90625,
|
33294 |
+
"learning_rate": 9.862414995228745e-06,
|
33295 |
+
"loss": 19.1751,
|
33296 |
+
"step": 47450
|
33297 |
+
},
|
33298 |
+
{
|
33299 |
+
"epoch": 0.8807298589412592,
|
33300 |
+
"grad_norm": 36.75,
|
33301 |
+
"learning_rate": 9.862385999442702e-06,
|
33302 |
+
"loss": 18.5023,
|
33303 |
+
"step": 47460
|
33304 |
+
},
|
33305 |
+
{
|
33306 |
+
"epoch": 0.880915432025739,
|
33307 |
+
"grad_norm": 35.375,
|
33308 |
+
"learning_rate": 9.86235700365666e-06,
|
33309 |
+
"loss": 18.9846,
|
33310 |
+
"step": 47470
|
33311 |
+
},
|
33312 |
+
{
|
33313 |
+
"epoch": 0.8811010051102188,
|
33314 |
+
"grad_norm": 34.90625,
|
33315 |
+
"learning_rate": 9.862328007870617e-06,
|
33316 |
+
"loss": 19.0925,
|
33317 |
+
"step": 47480
|
33318 |
+
},
|
33319 |
+
{
|
33320 |
+
"epoch": 0.8812865781946987,
|
33321 |
+
"grad_norm": 37.6875,
|
33322 |
+
"learning_rate": 9.862299012084574e-06,
|
33323 |
+
"loss": 19.0749,
|
33324 |
+
"step": 47490
|
33325 |
+
},
|
33326 |
+
{
|
33327 |
+
"epoch": 0.8814721512791784,
|
33328 |
+
"grad_norm": 38.9375,
|
33329 |
+
"learning_rate": 9.862270016298532e-06,
|
33330 |
+
"loss": 18.9493,
|
33331 |
+
"step": 47500
|
33332 |
+
},
|
33333 |
+
{
|
33334 |
+
"epoch": 0.8816577243636583,
|
33335 |
+
"grad_norm": 35.15625,
|
33336 |
+
"learning_rate": 9.86224102051249e-06,
|
33337 |
+
"loss": 19.2803,
|
33338 |
+
"step": 47510
|
33339 |
+
},
|
33340 |
+
{
|
33341 |
+
"epoch": 0.8818432974481382,
|
33342 |
+
"grad_norm": 39.0,
|
33343 |
+
"learning_rate": 9.862212024726448e-06,
|
33344 |
+
"loss": 19.335,
|
33345 |
+
"step": 47520
|
33346 |
+
},
|
33347 |
+
{
|
33348 |
+
"epoch": 0.8820288705326179,
|
33349 |
+
"grad_norm": 35.5625,
|
33350 |
+
"learning_rate": 9.862183028940406e-06,
|
33351 |
+
"loss": 18.7052,
|
33352 |
+
"step": 47530
|
33353 |
+
},
|
33354 |
+
{
|
33355 |
+
"epoch": 0.8822144436170978,
|
33356 |
+
"grad_norm": 38.09375,
|
33357 |
+
"learning_rate": 9.862154033154363e-06,
|
33358 |
+
"loss": 19.1641,
|
33359 |
+
"step": 47540
|
33360 |
+
},
|
33361 |
+
{
|
33362 |
+
"epoch": 0.8824000167015776,
|
33363 |
+
"grad_norm": 37.125,
|
33364 |
+
"learning_rate": 9.86212503736832e-06,
|
33365 |
+
"loss": 19.243,
|
33366 |
+
"step": 47550
|
33367 |
+
},
|
33368 |
+
{
|
33369 |
+
"epoch": 0.8825855897860574,
|
33370 |
+
"grad_norm": 37.71875,
|
33371 |
+
"learning_rate": 9.862096041582278e-06,
|
33372 |
+
"loss": 19.053,
|
33373 |
+
"step": 47560
|
33374 |
+
},
|
33375 |
+
{
|
33376 |
+
"epoch": 0.8827711628705373,
|
33377 |
+
"grad_norm": 32.5,
|
33378 |
+
"learning_rate": 9.862067045796235e-06,
|
33379 |
+
"loss": 18.7586,
|
33380 |
+
"step": 47570
|
33381 |
+
},
|
33382 |
+
{
|
33383 |
+
"epoch": 0.8829567359550171,
|
33384 |
+
"grad_norm": 35.03125,
|
33385 |
+
"learning_rate": 9.862038050010193e-06,
|
33386 |
+
"loss": 19.1617,
|
33387 |
+
"step": 47580
|
33388 |
+
},
|
33389 |
+
{
|
33390 |
+
"epoch": 0.8831423090394969,
|
33391 |
+
"grad_norm": 36.59375,
|
33392 |
+
"learning_rate": 9.86200905422415e-06,
|
33393 |
+
"loss": 18.6394,
|
33394 |
+
"step": 47590
|
33395 |
+
},
|
33396 |
+
{
|
33397 |
+
"epoch": 0.8833278821239767,
|
33398 |
+
"grad_norm": 36.75,
|
33399 |
+
"learning_rate": 9.861980058438108e-06,
|
33400 |
+
"loss": 18.9452,
|
33401 |
+
"step": 47600
|
33402 |
+
},
|
33403 |
+
{
|
33404 |
+
"epoch": 0.8835134552084566,
|
33405 |
+
"grad_norm": 35.0,
|
33406 |
+
"learning_rate": 9.861951062652065e-06,
|
33407 |
+
"loss": 19.132,
|
33408 |
+
"step": 47610
|
33409 |
+
},
|
33410 |
+
{
|
33411 |
+
"epoch": 0.8836990282929363,
|
33412 |
+
"grad_norm": 38.15625,
|
33413 |
+
"learning_rate": 9.861922066866022e-06,
|
33414 |
+
"loss": 19.0798,
|
33415 |
+
"step": 47620
|
33416 |
+
},
|
33417 |
+
{
|
33418 |
+
"epoch": 0.8838846013774162,
|
33419 |
+
"grad_norm": 35.96875,
|
33420 |
+
"learning_rate": 9.861893071079981e-06,
|
33421 |
+
"loss": 18.363,
|
33422 |
+
"step": 47630
|
33423 |
+
},
|
33424 |
+
{
|
33425 |
+
"epoch": 0.8840701744618961,
|
33426 |
+
"grad_norm": 35.59375,
|
33427 |
+
"learning_rate": 9.861864075293939e-06,
|
33428 |
+
"loss": 18.5769,
|
33429 |
+
"step": 47640
|
33430 |
+
},
|
33431 |
+
{
|
33432 |
+
"epoch": 0.8842557475463759,
|
33433 |
+
"grad_norm": 35.1875,
|
33434 |
+
"learning_rate": 9.861835079507896e-06,
|
33435 |
+
"loss": 18.8611,
|
33436 |
+
"step": 47650
|
33437 |
+
},
|
33438 |
+
{
|
33439 |
+
"epoch": 0.8844413206308557,
|
33440 |
+
"grad_norm": 36.125,
|
33441 |
+
"learning_rate": 9.861806083721854e-06,
|
33442 |
+
"loss": 18.9997,
|
33443 |
+
"step": 47660
|
33444 |
+
},
|
33445 |
+
{
|
33446 |
+
"epoch": 0.8846268937153355,
|
33447 |
+
"grad_norm": 35.5625,
|
33448 |
+
"learning_rate": 9.861777087935811e-06,
|
33449 |
+
"loss": 18.8766,
|
33450 |
+
"step": 47670
|
33451 |
+
},
|
33452 |
+
{
|
33453 |
+
"epoch": 0.8848124667998154,
|
33454 |
+
"grad_norm": 34.6875,
|
33455 |
+
"learning_rate": 9.861748092149768e-06,
|
33456 |
+
"loss": 19.2543,
|
33457 |
+
"step": 47680
|
33458 |
+
},
|
33459 |
+
{
|
33460 |
+
"epoch": 0.8849980398842952,
|
33461 |
+
"grad_norm": 35.25,
|
33462 |
+
"learning_rate": 9.861719096363726e-06,
|
33463 |
+
"loss": 18.987,
|
33464 |
+
"step": 47690
|
33465 |
+
},
|
33466 |
+
{
|
33467 |
+
"epoch": 0.885183612968775,
|
33468 |
+
"grad_norm": 38.28125,
|
33469 |
+
"learning_rate": 9.861690100577685e-06,
|
33470 |
+
"loss": 18.7889,
|
33471 |
+
"step": 47700
|
33472 |
+
},
|
33473 |
+
{
|
33474 |
+
"epoch": 0.8853691860532549,
|
33475 |
+
"grad_norm": 36.9375,
|
33476 |
+
"learning_rate": 9.86166110479164e-06,
|
33477 |
+
"loss": 18.8273,
|
33478 |
+
"step": 47710
|
33479 |
+
},
|
33480 |
+
{
|
33481 |
+
"epoch": 0.8855547591377346,
|
33482 |
+
"grad_norm": 38.03125,
|
33483 |
+
"learning_rate": 9.861632109005598e-06,
|
33484 |
+
"loss": 18.6823,
|
33485 |
+
"step": 47720
|
33486 |
+
},
|
33487 |
+
{
|
33488 |
+
"epoch": 0.8857403322222145,
|
33489 |
+
"grad_norm": 36.03125,
|
33490 |
+
"learning_rate": 9.861603113219557e-06,
|
33491 |
+
"loss": 18.8164,
|
33492 |
+
"step": 47730
|
33493 |
+
},
|
33494 |
+
{
|
33495 |
+
"epoch": 0.8859259053066944,
|
33496 |
+
"grad_norm": 33.46875,
|
33497 |
+
"learning_rate": 9.861574117433515e-06,
|
33498 |
+
"loss": 18.9653,
|
33499 |
+
"step": 47740
|
33500 |
+
},
|
33501 |
+
{
|
33502 |
+
"epoch": 0.8861114783911741,
|
33503 |
+
"grad_norm": 35.03125,
|
33504 |
+
"learning_rate": 9.861545121647472e-06,
|
33505 |
+
"loss": 18.7712,
|
33506 |
+
"step": 47750
|
33507 |
+
},
|
33508 |
+
{
|
33509 |
+
"epoch": 0.886297051475654,
|
33510 |
+
"grad_norm": 35.5,
|
33511 |
+
"learning_rate": 9.86151612586143e-06,
|
33512 |
+
"loss": 19.0222,
|
33513 |
+
"step": 47760
|
33514 |
+
},
|
33515 |
+
{
|
33516 |
+
"epoch": 0.8864826245601338,
|
33517 |
+
"grad_norm": 37.375,
|
33518 |
+
"learning_rate": 9.861487130075387e-06,
|
33519 |
+
"loss": 19.0491,
|
33520 |
+
"step": 47770
|
33521 |
+
},
|
33522 |
+
{
|
33523 |
+
"epoch": 0.8866681976446136,
|
33524 |
+
"grad_norm": 37.46875,
|
33525 |
+
"learning_rate": 9.861458134289344e-06,
|
33526 |
+
"loss": 19.0221,
|
33527 |
+
"step": 47780
|
33528 |
+
},
|
33529 |
+
{
|
33530 |
+
"epoch": 0.8868537707290934,
|
33531 |
+
"grad_norm": 38.96875,
|
33532 |
+
"learning_rate": 9.861429138503302e-06,
|
33533 |
+
"loss": 19.1566,
|
33534 |
+
"step": 47790
|
33535 |
+
},
|
33536 |
+
{
|
33537 |
+
"epoch": 0.8870393438135733,
|
33538 |
+
"grad_norm": 37.34375,
|
33539 |
+
"learning_rate": 9.86140014271726e-06,
|
33540 |
+
"loss": 18.9649,
|
33541 |
+
"step": 47800
|
33542 |
+
},
|
33543 |
+
{
|
33544 |
+
"epoch": 0.887224916898053,
|
33545 |
+
"grad_norm": 36.28125,
|
33546 |
+
"learning_rate": 9.861371146931216e-06,
|
33547 |
+
"loss": 18.9498,
|
33548 |
+
"step": 47810
|
33549 |
+
},
|
33550 |
+
{
|
33551 |
+
"epoch": 0.8874104899825329,
|
33552 |
+
"grad_norm": 37.125,
|
33553 |
+
"learning_rate": 9.861342151145174e-06,
|
33554 |
+
"loss": 18.8002,
|
33555 |
+
"step": 47820
|
33556 |
+
},
|
33557 |
+
{
|
33558 |
+
"epoch": 0.8875960630670128,
|
33559 |
+
"grad_norm": 35.5,
|
33560 |
+
"learning_rate": 9.861313155359133e-06,
|
33561 |
+
"loss": 19.0088,
|
33562 |
+
"step": 47830
|
33563 |
+
},
|
33564 |
+
{
|
33565 |
+
"epoch": 0.8877816361514926,
|
33566 |
+
"grad_norm": 37.53125,
|
33567 |
+
"learning_rate": 9.86128415957309e-06,
|
33568 |
+
"loss": 18.5887,
|
33569 |
+
"step": 47840
|
33570 |
+
},
|
33571 |
+
{
|
33572 |
+
"epoch": 0.8879672092359724,
|
33573 |
+
"grad_norm": 36.375,
|
33574 |
+
"learning_rate": 9.861255163787048e-06,
|
33575 |
+
"loss": 18.9607,
|
33576 |
+
"step": 47850
|
33577 |
+
},
|
33578 |
+
{
|
33579 |
+
"epoch": 0.8881527823204522,
|
33580 |
+
"grad_norm": 35.15625,
|
33581 |
+
"learning_rate": 9.861226168001005e-06,
|
33582 |
+
"loss": 18.6494,
|
33583 |
+
"step": 47860
|
33584 |
+
},
|
33585 |
+
{
|
33586 |
+
"epoch": 0.8883383554049321,
|
33587 |
+
"grad_norm": 35.78125,
|
33588 |
+
"learning_rate": 9.861197172214963e-06,
|
33589 |
+
"loss": 19.3655,
|
33590 |
+
"step": 47870
|
33591 |
+
},
|
33592 |
+
{
|
33593 |
+
"epoch": 0.8885239284894119,
|
33594 |
+
"grad_norm": 36.9375,
|
33595 |
+
"learning_rate": 9.86116817642892e-06,
|
33596 |
+
"loss": 19.1031,
|
33597 |
+
"step": 47880
|
33598 |
+
},
|
33599 |
+
{
|
33600 |
+
"epoch": 0.8887095015738917,
|
33601 |
+
"grad_norm": 37.15625,
|
33602 |
+
"learning_rate": 9.861139180642877e-06,
|
33603 |
+
"loss": 18.8792,
|
33604 |
+
"step": 47890
|
33605 |
+
},
|
33606 |
+
{
|
33607 |
+
"epoch": 0.8888950746583716,
|
33608 |
+
"grad_norm": 36.09375,
|
33609 |
+
"learning_rate": 9.861110184856837e-06,
|
33610 |
+
"loss": 18.7694,
|
33611 |
+
"step": 47900
|
33612 |
+
},
|
33613 |
+
{
|
33614 |
+
"epoch": 0.8890806477428513,
|
33615 |
+
"grad_norm": 34.125,
|
33616 |
+
"learning_rate": 9.861081189070794e-06,
|
33617 |
+
"loss": 19.1518,
|
33618 |
+
"step": 47910
|
33619 |
+
},
|
33620 |
+
{
|
33621 |
+
"epoch": 0.8892662208273312,
|
33622 |
+
"grad_norm": 35.90625,
|
33623 |
+
"learning_rate": 9.86105219328475e-06,
|
33624 |
+
"loss": 19.2373,
|
33625 |
+
"step": 47920
|
33626 |
+
},
|
33627 |
+
{
|
33628 |
+
"epoch": 0.8894517939118111,
|
33629 |
+
"grad_norm": 38.25,
|
33630 |
+
"learning_rate": 9.861023197498709e-06,
|
33631 |
+
"loss": 18.6068,
|
33632 |
+
"step": 47930
|
33633 |
+
},
|
33634 |
+
{
|
33635 |
+
"epoch": 0.8896373669962908,
|
33636 |
+
"grad_norm": 37.46875,
|
33637 |
+
"learning_rate": 9.860994201712666e-06,
|
33638 |
+
"loss": 18.9451,
|
33639 |
+
"step": 47940
|
33640 |
+
},
|
33641 |
+
{
|
33642 |
+
"epoch": 0.8898229400807707,
|
33643 |
+
"grad_norm": 38.53125,
|
33644 |
+
"learning_rate": 9.860965205926624e-06,
|
33645 |
+
"loss": 18.7446,
|
33646 |
+
"step": 47950
|
33647 |
+
},
|
33648 |
+
{
|
33649 |
+
"epoch": 0.8900085131652505,
|
33650 |
+
"grad_norm": 36.28125,
|
33651 |
+
"learning_rate": 9.860936210140581e-06,
|
33652 |
+
"loss": 18.5875,
|
33653 |
+
"step": 47960
|
33654 |
+
},
|
33655 |
+
{
|
33656 |
+
"epoch": 0.8901940862497303,
|
33657 |
+
"grad_norm": 35.0,
|
33658 |
+
"learning_rate": 9.860907214354538e-06,
|
33659 |
+
"loss": 19.145,
|
33660 |
+
"step": 47970
|
33661 |
+
},
|
33662 |
+
{
|
33663 |
+
"epoch": 0.8903796593342101,
|
33664 |
+
"grad_norm": 39.1875,
|
33665 |
+
"learning_rate": 9.860878218568496e-06,
|
33666 |
+
"loss": 18.6372,
|
33667 |
+
"step": 47980
|
33668 |
+
},
|
33669 |
+
{
|
33670 |
+
"epoch": 0.89056523241869,
|
33671 |
+
"grad_norm": 36.1875,
|
33672 |
+
"learning_rate": 9.860849222782453e-06,
|
33673 |
+
"loss": 19.0401,
|
33674 |
+
"step": 47990
|
33675 |
+
},
|
33676 |
+
{
|
33677 |
+
"epoch": 0.8907508055031699,
|
33678 |
+
"grad_norm": 36.03125,
|
33679 |
+
"learning_rate": 9.860820226996412e-06,
|
33680 |
+
"loss": 19.0229,
|
33681 |
+
"step": 48000
|
33682 |
+
},
|
33683 |
+
{
|
33684 |
+
"epoch": 0.8909363785876496,
|
33685 |
+
"grad_norm": 36.125,
|
33686 |
+
"learning_rate": 9.86079123121037e-06,
|
33687 |
+
"loss": 18.9806,
|
33688 |
+
"step": 48010
|
33689 |
+
},
|
33690 |
+
{
|
33691 |
+
"epoch": 0.8911219516721295,
|
33692 |
+
"grad_norm": 36.25,
|
33693 |
+
"learning_rate": 9.860762235424327e-06,
|
33694 |
+
"loss": 19.2438,
|
33695 |
+
"step": 48020
|
33696 |
+
},
|
33697 |
+
{
|
33698 |
+
"epoch": 0.8913075247566093,
|
33699 |
+
"grad_norm": 36.875,
|
33700 |
+
"learning_rate": 9.860733239638285e-06,
|
33701 |
+
"loss": 18.9621,
|
33702 |
+
"step": 48030
|
33703 |
+
},
|
33704 |
+
{
|
33705 |
+
"epoch": 0.8914930978410891,
|
33706 |
+
"grad_norm": 33.46875,
|
33707 |
+
"learning_rate": 9.860704243852242e-06,
|
33708 |
+
"loss": 18.8874,
|
33709 |
+
"step": 48040
|
33710 |
+
},
|
33711 |
+
{
|
33712 |
+
"epoch": 0.891678670925569,
|
33713 |
+
"grad_norm": 35.65625,
|
33714 |
+
"learning_rate": 9.8606752480662e-06,
|
33715 |
+
"loss": 18.5955,
|
33716 |
+
"step": 48050
|
33717 |
+
},
|
33718 |
+
{
|
33719 |
+
"epoch": 0.8918642440100488,
|
33720 |
+
"grad_norm": 35.21875,
|
33721 |
+
"learning_rate": 9.860646252280157e-06,
|
33722 |
+
"loss": 19.0383,
|
33723 |
+
"step": 48060
|
33724 |
+
},
|
33725 |
+
{
|
33726 |
+
"epoch": 0.8920498170945286,
|
33727 |
+
"grad_norm": 39.28125,
|
33728 |
+
"learning_rate": 9.860617256494114e-06,
|
33729 |
+
"loss": 19.1283,
|
33730 |
+
"step": 48070
|
33731 |
+
},
|
33732 |
+
{
|
33733 |
+
"epoch": 0.8922353901790084,
|
33734 |
+
"grad_norm": 36.65625,
|
33735 |
+
"learning_rate": 9.860588260708072e-06,
|
33736 |
+
"loss": 18.4688,
|
33737 |
+
"step": 48080
|
33738 |
+
},
|
33739 |
+
{
|
33740 |
+
"epoch": 0.8924209632634883,
|
33741 |
+
"grad_norm": 35.59375,
|
33742 |
+
"learning_rate": 9.860559264922029e-06,
|
33743 |
+
"loss": 18.8262,
|
33744 |
+
"step": 48090
|
33745 |
+
},
|
33746 |
+
{
|
33747 |
+
"epoch": 0.892606536347968,
|
33748 |
+
"grad_norm": 35.53125,
|
33749 |
+
"learning_rate": 9.860530269135988e-06,
|
33750 |
+
"loss": 18.8162,
|
33751 |
+
"step": 48100
|
33752 |
+
},
|
33753 |
+
{
|
33754 |
+
"epoch": 0.8927921094324479,
|
33755 |
+
"grad_norm": 36.75,
|
33756 |
+
"learning_rate": 9.860501273349945e-06,
|
33757 |
+
"loss": 18.8916,
|
33758 |
+
"step": 48110
|
33759 |
+
},
|
33760 |
+
{
|
33761 |
+
"epoch": 0.8929776825169278,
|
33762 |
+
"grad_norm": 36.90625,
|
33763 |
+
"learning_rate": 9.860472277563903e-06,
|
33764 |
+
"loss": 18.898,
|
33765 |
+
"step": 48120
|
33766 |
+
},
|
33767 |
+
{
|
33768 |
+
"epoch": 0.8931632556014075,
|
33769 |
+
"grad_norm": 39.03125,
|
33770 |
+
"learning_rate": 9.86044328177786e-06,
|
33771 |
+
"loss": 18.7842,
|
33772 |
+
"step": 48130
|
33773 |
+
},
|
33774 |
+
{
|
33775 |
+
"epoch": 0.8933488286858874,
|
33776 |
+
"grad_norm": 35.375,
|
33777 |
+
"learning_rate": 9.860414285991818e-06,
|
33778 |
+
"loss": 19.06,
|
33779 |
+
"step": 48140
|
33780 |
+
},
|
33781 |
+
{
|
33782 |
+
"epoch": 0.8935344017703672,
|
33783 |
+
"grad_norm": 34.03125,
|
33784 |
+
"learning_rate": 9.860385290205775e-06,
|
33785 |
+
"loss": 18.5382,
|
33786 |
+
"step": 48150
|
33787 |
+
},
|
33788 |
+
{
|
33789 |
+
"epoch": 0.893719974854847,
|
33790 |
+
"grad_norm": 34.25,
|
33791 |
+
"learning_rate": 9.860356294419732e-06,
|
33792 |
+
"loss": 19.0277,
|
33793 |
+
"step": 48160
|
33794 |
+
},
|
33795 |
+
{
|
33796 |
+
"epoch": 0.8939055479393269,
|
33797 |
+
"grad_norm": 36.21875,
|
33798 |
+
"learning_rate": 9.86032729863369e-06,
|
33799 |
+
"loss": 18.6719,
|
33800 |
+
"step": 48170
|
33801 |
+
},
|
33802 |
+
{
|
33803 |
+
"epoch": 0.8940911210238067,
|
33804 |
+
"grad_norm": 35.3125,
|
33805 |
+
"learning_rate": 9.860298302847649e-06,
|
33806 |
+
"loss": 18.9148,
|
33807 |
+
"step": 48180
|
33808 |
+
},
|
33809 |
+
{
|
33810 |
+
"epoch": 0.8942766941082866,
|
33811 |
+
"grad_norm": 35.78125,
|
33812 |
+
"learning_rate": 9.860269307061605e-06,
|
33813 |
+
"loss": 19.3434,
|
33814 |
+
"step": 48190
|
33815 |
+
},
|
33816 |
+
{
|
33817 |
+
"epoch": 0.8944622671927663,
|
33818 |
+
"grad_norm": 37.125,
|
33819 |
+
"learning_rate": 9.860240311275562e-06,
|
33820 |
+
"loss": 18.9732,
|
33821 |
+
"step": 48200
|
33822 |
+
},
|
33823 |
+
{
|
33824 |
+
"epoch": 0.8946478402772462,
|
33825 |
+
"grad_norm": 34.59375,
|
33826 |
+
"learning_rate": 9.860211315489521e-06,
|
33827 |
+
"loss": 18.8076,
|
33828 |
+
"step": 48210
|
33829 |
+
},
|
33830 |
+
{
|
33831 |
+
"epoch": 0.8948334133617261,
|
33832 |
+
"grad_norm": 36.90625,
|
33833 |
+
"learning_rate": 9.860182319703479e-06,
|
33834 |
+
"loss": 18.5892,
|
33835 |
+
"step": 48220
|
33836 |
+
},
|
33837 |
+
{
|
33838 |
+
"epoch": 0.8950189864462058,
|
33839 |
+
"grad_norm": 38.1875,
|
33840 |
+
"learning_rate": 9.860153323917436e-06,
|
33841 |
+
"loss": 18.7777,
|
33842 |
+
"step": 48230
|
33843 |
+
},
|
33844 |
+
{
|
33845 |
+
"epoch": 0.8952045595306857,
|
33846 |
+
"grad_norm": 35.84375,
|
33847 |
+
"learning_rate": 9.860124328131393e-06,
|
33848 |
+
"loss": 18.6854,
|
33849 |
+
"step": 48240
|
33850 |
+
},
|
33851 |
+
{
|
33852 |
+
"epoch": 0.8953901326151655,
|
33853 |
+
"grad_norm": 36.96875,
|
33854 |
+
"learning_rate": 9.86009533234535e-06,
|
33855 |
+
"loss": 18.9375,
|
33856 |
+
"step": 48250
|
33857 |
+
},
|
33858 |
+
{
|
33859 |
+
"epoch": 0.8955757056996453,
|
33860 |
+
"grad_norm": 36.5625,
|
33861 |
+
"learning_rate": 9.860066336559308e-06,
|
33862 |
+
"loss": 18.5619,
|
33863 |
+
"step": 48260
|
33864 |
+
},
|
33865 |
+
{
|
33866 |
+
"epoch": 0.8957612787841251,
|
33867 |
+
"grad_norm": 35.46875,
|
33868 |
+
"learning_rate": 9.860037340773266e-06,
|
33869 |
+
"loss": 18.8577,
|
33870 |
+
"step": 48270
|
33871 |
+
},
|
33872 |
+
{
|
33873 |
+
"epoch": 0.895946851868605,
|
33874 |
+
"grad_norm": 35.0,
|
33875 |
+
"learning_rate": 9.860008344987225e-06,
|
33876 |
+
"loss": 19.1907,
|
33877 |
+
"step": 48280
|
33878 |
+
},
|
33879 |
+
{
|
33880 |
+
"epoch": 0.8961324249530848,
|
33881 |
+
"grad_norm": 36.25,
|
33882 |
+
"learning_rate": 9.859979349201182e-06,
|
33883 |
+
"loss": 18.8677,
|
33884 |
+
"step": 48290
|
33885 |
+
},
|
33886 |
+
{
|
33887 |
+
"epoch": 0.8963179980375646,
|
33888 |
+
"grad_norm": 36.375,
|
33889 |
+
"learning_rate": 9.859950353415138e-06,
|
33890 |
+
"loss": 18.9737,
|
33891 |
+
"step": 48300
|
33892 |
+
},
|
33893 |
+
{
|
33894 |
+
"epoch": 0.8965035711220445,
|
33895 |
+
"grad_norm": 36.71875,
|
33896 |
+
"learning_rate": 9.859921357629097e-06,
|
33897 |
+
"loss": 19.0863,
|
33898 |
+
"step": 48310
|
33899 |
+
},
|
33900 |
+
{
|
33901 |
+
"epoch": 0.8966891442065242,
|
33902 |
+
"grad_norm": 35.875,
|
33903 |
+
"learning_rate": 9.859892361843054e-06,
|
33904 |
+
"loss": 18.5042,
|
33905 |
+
"step": 48320
|
33906 |
+
},
|
33907 |
+
{
|
33908 |
+
"epoch": 0.8968747172910041,
|
33909 |
+
"grad_norm": 37.15625,
|
33910 |
+
"learning_rate": 9.859863366057012e-06,
|
33911 |
+
"loss": 19.0693,
|
33912 |
+
"step": 48330
|
33913 |
+
},
|
33914 |
+
{
|
33915 |
+
"epoch": 0.897060290375484,
|
33916 |
+
"grad_norm": 37.5,
|
33917 |
+
"learning_rate": 9.85983437027097e-06,
|
33918 |
+
"loss": 18.991,
|
33919 |
+
"step": 48340
|
33920 |
+
},
|
33921 |
+
{
|
33922 |
+
"epoch": 0.8972458634599638,
|
33923 |
+
"grad_norm": 35.125,
|
33924 |
+
"learning_rate": 9.859805374484927e-06,
|
33925 |
+
"loss": 18.5443,
|
33926 |
+
"step": 48350
|
33927 |
+
},
|
33928 |
+
{
|
33929 |
+
"epoch": 0.8974314365444436,
|
33930 |
+
"grad_norm": 39.0625,
|
33931 |
+
"learning_rate": 9.859776378698884e-06,
|
33932 |
+
"loss": 18.6623,
|
33933 |
+
"step": 48360
|
33934 |
+
},
|
33935 |
+
{
|
33936 |
+
"epoch": 0.8976170096289234,
|
33937 |
+
"grad_norm": 37.15625,
|
33938 |
+
"learning_rate": 9.859747382912841e-06,
|
33939 |
+
"loss": 18.8904,
|
33940 |
+
"step": 48370
|
33941 |
+
},
|
33942 |
+
{
|
33943 |
+
"epoch": 0.8978025827134033,
|
33944 |
+
"grad_norm": 35.71875,
|
33945 |
+
"learning_rate": 9.8597183871268e-06,
|
33946 |
+
"loss": 18.4478,
|
33947 |
+
"step": 48380
|
33948 |
+
},
|
33949 |
+
{
|
33950 |
+
"epoch": 0.897988155797883,
|
33951 |
+
"grad_norm": 35.6875,
|
33952 |
+
"learning_rate": 9.859689391340758e-06,
|
33953 |
+
"loss": 18.6417,
|
33954 |
+
"step": 48390
|
33955 |
+
},
|
33956 |
+
{
|
33957 |
+
"epoch": 0.8981737288823629,
|
33958 |
+
"grad_norm": 36.71875,
|
33959 |
+
"learning_rate": 9.859660395554714e-06,
|
33960 |
+
"loss": 18.5121,
|
33961 |
+
"step": 48400
|
33962 |
+
},
|
33963 |
+
{
|
33964 |
+
"epoch": 0.8983593019668428,
|
33965 |
+
"grad_norm": 38.53125,
|
33966 |
+
"learning_rate": 9.859631399768673e-06,
|
33967 |
+
"loss": 18.3646,
|
33968 |
+
"step": 48410
|
33969 |
+
},
|
33970 |
+
{
|
33971 |
+
"epoch": 0.8985448750513225,
|
33972 |
+
"grad_norm": 37.15625,
|
33973 |
+
"learning_rate": 9.85960240398263e-06,
|
33974 |
+
"loss": 18.9429,
|
33975 |
+
"step": 48420
|
33976 |
+
},
|
33977 |
+
{
|
33978 |
+
"epoch": 0.8987304481358024,
|
33979 |
+
"grad_norm": 36.46875,
|
33980 |
+
"learning_rate": 9.859573408196588e-06,
|
33981 |
+
"loss": 19.2573,
|
33982 |
+
"step": 48430
|
33983 |
+
},
|
33984 |
+
{
|
33985 |
+
"epoch": 0.8989160212202822,
|
33986 |
+
"grad_norm": 37.28125,
|
33987 |
+
"learning_rate": 9.859544412410545e-06,
|
33988 |
+
"loss": 19.204,
|
33989 |
+
"step": 48440
|
33990 |
+
},
|
33991 |
+
{
|
33992 |
+
"epoch": 0.899101594304762,
|
33993 |
+
"grad_norm": 37.8125,
|
33994 |
+
"learning_rate": 9.859515416624504e-06,
|
33995 |
+
"loss": 18.7448,
|
33996 |
+
"step": 48450
|
33997 |
+
},
|
33998 |
+
{
|
33999 |
+
"epoch": 0.8992871673892419,
|
34000 |
+
"grad_norm": 35.6875,
|
34001 |
+
"learning_rate": 9.85948642083846e-06,
|
34002 |
+
"loss": 19.055,
|
34003 |
+
"step": 48460
|
34004 |
+
},
|
34005 |
+
{
|
34006 |
+
"epoch": 0.8994727404737217,
|
34007 |
+
"grad_norm": 37.71875,
|
34008 |
+
"learning_rate": 9.859457425052417e-06,
|
34009 |
+
"loss": 18.3525,
|
34010 |
+
"step": 48470
|
34011 |
+
},
|
34012 |
+
{
|
34013 |
+
"epoch": 0.8996583135582015,
|
34014 |
+
"grad_norm": 35.1875,
|
34015 |
+
"learning_rate": 9.859428429266376e-06,
|
34016 |
+
"loss": 18.8309,
|
34017 |
+
"step": 48480
|
34018 |
+
},
|
34019 |
+
{
|
34020 |
+
"epoch": 0.8998438866426813,
|
34021 |
+
"grad_norm": 34.84375,
|
34022 |
+
"learning_rate": 9.859399433480334e-06,
|
34023 |
+
"loss": 18.8916,
|
34024 |
+
"step": 48490
|
34025 |
+
},
|
34026 |
+
{
|
34027 |
+
"epoch": 0.9000294597271612,
|
34028 |
+
"grad_norm": 38.09375,
|
34029 |
+
"learning_rate": 9.859370437694291e-06,
|
34030 |
+
"loss": 19.0495,
|
34031 |
+
"step": 48500
|
34032 |
+
},
|
34033 |
+
{
|
34034 |
+
"epoch": 0.900215032811641,
|
34035 |
+
"grad_norm": 38.71875,
|
34036 |
+
"learning_rate": 9.859341441908249e-06,
|
34037 |
+
"loss": 19.1123,
|
34038 |
+
"step": 48510
|
34039 |
+
},
|
34040 |
+
{
|
34041 |
+
"epoch": 0.9004006058961208,
|
34042 |
+
"grad_norm": 39.46875,
|
34043 |
+
"learning_rate": 9.859312446122206e-06,
|
34044 |
+
"loss": 19.1905,
|
34045 |
+
"step": 48520
|
34046 |
+
},
|
34047 |
+
{
|
34048 |
+
"epoch": 0.9005861789806007,
|
34049 |
+
"grad_norm": 39.09375,
|
34050 |
+
"learning_rate": 9.859283450336163e-06,
|
34051 |
+
"loss": 19.0494,
|
34052 |
+
"step": 48530
|
34053 |
+
},
|
34054 |
+
{
|
34055 |
+
"epoch": 0.9007717520650805,
|
34056 |
+
"grad_norm": 35.4375,
|
34057 |
+
"learning_rate": 9.85925445455012e-06,
|
34058 |
+
"loss": 18.9496,
|
34059 |
+
"step": 48540
|
34060 |
+
},
|
34061 |
+
{
|
34062 |
+
"epoch": 0.9009573251495603,
|
34063 |
+
"grad_norm": 36.03125,
|
34064 |
+
"learning_rate": 9.85922545876408e-06,
|
34065 |
+
"loss": 18.5396,
|
34066 |
+
"step": 48550
|
34067 |
+
},
|
34068 |
+
{
|
34069 |
+
"epoch": 0.9011428982340401,
|
34070 |
+
"grad_norm": 38.8125,
|
34071 |
+
"learning_rate": 9.859196462978036e-06,
|
34072 |
+
"loss": 18.9276,
|
34073 |
+
"step": 48560
|
34074 |
+
},
|
34075 |
+
{
|
34076 |
+
"epoch": 0.90132847131852,
|
34077 |
+
"grad_norm": 39.15625,
|
34078 |
+
"learning_rate": 9.859167467191993e-06,
|
34079 |
+
"loss": 18.6237,
|
34080 |
+
"step": 48570
|
34081 |
+
},
|
34082 |
+
{
|
34083 |
+
"epoch": 0.9015140444029998,
|
34084 |
+
"grad_norm": 38.1875,
|
34085 |
+
"learning_rate": 9.859138471405952e-06,
|
34086 |
+
"loss": 19.0431,
|
34087 |
+
"step": 48580
|
34088 |
+
},
|
34089 |
+
{
|
34090 |
+
"epoch": 0.9016996174874796,
|
34091 |
+
"grad_norm": 35.53125,
|
34092 |
+
"learning_rate": 9.85910947561991e-06,
|
34093 |
+
"loss": 19.2482,
|
34094 |
+
"step": 48590
|
34095 |
+
},
|
34096 |
+
{
|
34097 |
+
"epoch": 0.9018851905719595,
|
34098 |
+
"grad_norm": 36.4375,
|
34099 |
+
"learning_rate": 9.859080479833867e-06,
|
34100 |
+
"loss": 19.0075,
|
34101 |
+
"step": 48600
|
34102 |
+
},
|
34103 |
+
{
|
34104 |
+
"epoch": 0.9020707636564392,
|
34105 |
+
"grad_norm": 36.125,
|
34106 |
+
"learning_rate": 9.859051484047824e-06,
|
34107 |
+
"loss": 19.0288,
|
34108 |
+
"step": 48610
|
34109 |
+
},
|
34110 |
+
{
|
34111 |
+
"epoch": 0.9022563367409191,
|
34112 |
+
"grad_norm": 37.6875,
|
34113 |
+
"learning_rate": 9.859022488261782e-06,
|
34114 |
+
"loss": 18.5252,
|
34115 |
+
"step": 48620
|
34116 |
+
},
|
34117 |
+
{
|
34118 |
+
"epoch": 0.902441909825399,
|
34119 |
+
"grad_norm": 37.0,
|
34120 |
+
"learning_rate": 9.858993492475739e-06,
|
34121 |
+
"loss": 19.3277,
|
34122 |
+
"step": 48630
|
34123 |
+
},
|
34124 |
+
{
|
34125 |
+
"epoch": 0.9026274829098787,
|
34126 |
+
"grad_norm": 35.8125,
|
34127 |
+
"learning_rate": 9.858964496689697e-06,
|
34128 |
+
"loss": 18.7398,
|
34129 |
+
"step": 48640
|
34130 |
+
},
|
34131 |
+
{
|
34132 |
+
"epoch": 0.9028130559943586,
|
34133 |
+
"grad_norm": 37.8125,
|
34134 |
+
"learning_rate": 9.858935500903654e-06,
|
34135 |
+
"loss": 19.119,
|
34136 |
+
"step": 48650
|
34137 |
+
},
|
34138 |
+
{
|
34139 |
+
"epoch": 0.9029986290788384,
|
34140 |
+
"grad_norm": 37.46875,
|
34141 |
+
"learning_rate": 9.858906505117613e-06,
|
34142 |
+
"loss": 18.9497,
|
34143 |
+
"step": 48660
|
34144 |
+
},
|
34145 |
+
{
|
34146 |
+
"epoch": 0.9031842021633182,
|
34147 |
+
"grad_norm": 35.3125,
|
34148 |
+
"learning_rate": 9.858877509331569e-06,
|
34149 |
+
"loss": 18.5547,
|
34150 |
+
"step": 48670
|
34151 |
+
},
|
34152 |
+
{
|
34153 |
+
"epoch": 0.903369775247798,
|
34154 |
+
"grad_norm": 38.53125,
|
34155 |
+
"learning_rate": 9.858848513545526e-06,
|
34156 |
+
"loss": 19.0391,
|
34157 |
+
"step": 48680
|
34158 |
+
},
|
34159 |
+
{
|
34160 |
+
"epoch": 0.9035553483322779,
|
34161 |
+
"grad_norm": 33.28125,
|
34162 |
+
"learning_rate": 9.858819517759485e-06,
|
34163 |
+
"loss": 18.7029,
|
34164 |
+
"step": 48690
|
34165 |
+
},
|
34166 |
+
{
|
34167 |
+
"epoch": 0.9037409214167577,
|
34168 |
+
"grad_norm": 37.46875,
|
34169 |
+
"learning_rate": 9.858790521973443e-06,
|
34170 |
+
"loss": 18.9116,
|
34171 |
+
"step": 48700
|
34172 |
+
},
|
34173 |
+
{
|
34174 |
+
"epoch": 0.9039264945012375,
|
34175 |
+
"grad_norm": 36.5,
|
34176 |
+
"learning_rate": 9.8587615261874e-06,
|
34177 |
+
"loss": 19.0272,
|
34178 |
+
"step": 48710
|
34179 |
+
},
|
34180 |
+
{
|
34181 |
+
"epoch": 0.9041120675857174,
|
34182 |
+
"grad_norm": 36.1875,
|
34183 |
+
"learning_rate": 9.858732530401357e-06,
|
34184 |
+
"loss": 19.181,
|
34185 |
+
"step": 48720
|
34186 |
+
},
|
34187 |
+
{
|
34188 |
+
"epoch": 0.9042976406701972,
|
34189 |
+
"grad_norm": 36.96875,
|
34190 |
+
"learning_rate": 9.858703534615315e-06,
|
34191 |
+
"loss": 18.9267,
|
34192 |
+
"step": 48730
|
34193 |
+
},
|
34194 |
+
{
|
34195 |
+
"epoch": 0.904483213754677,
|
34196 |
+
"grad_norm": 36.96875,
|
34197 |
+
"learning_rate": 9.858674538829272e-06,
|
34198 |
+
"loss": 19.3512,
|
34199 |
+
"step": 48740
|
34200 |
+
},
|
34201 |
+
{
|
34202 |
+
"epoch": 0.9046687868391569,
|
34203 |
+
"grad_norm": 37.53125,
|
34204 |
+
"learning_rate": 9.85864554304323e-06,
|
34205 |
+
"loss": 19.0552,
|
34206 |
+
"step": 48750
|
34207 |
+
},
|
34208 |
+
{
|
34209 |
+
"epoch": 0.9048543599236367,
|
34210 |
+
"grad_norm": 37.8125,
|
34211 |
+
"learning_rate": 9.858616547257189e-06,
|
34212 |
+
"loss": 18.5666,
|
34213 |
+
"step": 48760
|
34214 |
+
},
|
34215 |
+
{
|
34216 |
+
"epoch": 0.9050399330081165,
|
34217 |
+
"grad_norm": 34.96875,
|
34218 |
+
"learning_rate": 9.858587551471146e-06,
|
34219 |
+
"loss": 18.9902,
|
34220 |
+
"step": 48770
|
34221 |
+
},
|
34222 |
+
{
|
34223 |
+
"epoch": 0.9052255060925963,
|
34224 |
+
"grad_norm": 36.59375,
|
34225 |
+
"learning_rate": 9.858558555685102e-06,
|
34226 |
+
"loss": 19.0585,
|
34227 |
+
"step": 48780
|
34228 |
+
},
|
34229 |
+
{
|
34230 |
+
"epoch": 0.9054110791770762,
|
34231 |
+
"grad_norm": 36.375,
|
34232 |
+
"learning_rate": 9.858529559899061e-06,
|
34233 |
+
"loss": 18.8288,
|
34234 |
+
"step": 48790
|
34235 |
+
},
|
34236 |
+
{
|
34237 |
+
"epoch": 0.905596652261556,
|
34238 |
+
"grad_norm": 36.8125,
|
34239 |
+
"learning_rate": 9.858500564113018e-06,
|
34240 |
+
"loss": 18.8442,
|
34241 |
+
"step": 48800
|
34242 |
+
},
|
34243 |
+
{
|
34244 |
+
"epoch": 0.9057822253460358,
|
34245 |
+
"grad_norm": 36.625,
|
34246 |
+
"learning_rate": 9.858471568326976e-06,
|
34247 |
+
"loss": 18.3578,
|
34248 |
+
"step": 48810
|
34249 |
+
},
|
34250 |
+
{
|
34251 |
+
"epoch": 0.9059677984305157,
|
34252 |
+
"grad_norm": 39.15625,
|
34253 |
+
"learning_rate": 9.858442572540933e-06,
|
34254 |
+
"loss": 18.6956,
|
34255 |
+
"step": 48820
|
34256 |
+
},
|
34257 |
+
{
|
34258 |
+
"epoch": 0.9061533715149954,
|
34259 |
+
"grad_norm": 36.65625,
|
34260 |
+
"learning_rate": 9.85841357675489e-06,
|
34261 |
+
"loss": 18.7518,
|
34262 |
+
"step": 48830
|
34263 |
+
},
|
34264 |
+
{
|
34265 |
+
"epoch": 0.9063389445994753,
|
34266 |
+
"grad_norm": 39.1875,
|
34267 |
+
"learning_rate": 9.858384580968848e-06,
|
34268 |
+
"loss": 19.176,
|
34269 |
+
"step": 48840
|
34270 |
+
},
|
34271 |
+
{
|
34272 |
+
"epoch": 0.9065245176839551,
|
34273 |
+
"grad_norm": 39.0625,
|
34274 |
+
"learning_rate": 9.858355585182805e-06,
|
34275 |
+
"loss": 18.8138,
|
34276 |
+
"step": 48850
|
34277 |
+
},
|
34278 |
+
{
|
34279 |
+
"epoch": 0.9067100907684349,
|
34280 |
+
"grad_norm": 34.03125,
|
34281 |
+
"learning_rate": 9.858326589396765e-06,
|
34282 |
+
"loss": 19.1294,
|
34283 |
+
"step": 48860
|
34284 |
+
},
|
34285 |
+
{
|
34286 |
+
"epoch": 0.9068956638529148,
|
34287 |
+
"grad_norm": 37.3125,
|
34288 |
+
"learning_rate": 9.858297593610722e-06,
|
34289 |
+
"loss": 19.0567,
|
34290 |
+
"step": 48870
|
34291 |
+
},
|
34292 |
+
{
|
34293 |
+
"epoch": 0.9070812369373946,
|
34294 |
+
"grad_norm": 35.8125,
|
34295 |
+
"learning_rate": 9.85826859782468e-06,
|
34296 |
+
"loss": 18.7387,
|
34297 |
+
"step": 48880
|
34298 |
+
},
|
34299 |
+
{
|
34300 |
+
"epoch": 0.9072668100218745,
|
34301 |
+
"grad_norm": 36.96875,
|
34302 |
+
"learning_rate": 9.858239602038637e-06,
|
34303 |
+
"loss": 19.0438,
|
34304 |
+
"step": 48890
|
34305 |
+
},
|
34306 |
+
{
|
34307 |
+
"epoch": 0.9074523831063542,
|
34308 |
+
"grad_norm": 34.25,
|
34309 |
+
"learning_rate": 9.858210606252594e-06,
|
34310 |
+
"loss": 18.7993,
|
34311 |
+
"step": 48900
|
34312 |
+
},
|
34313 |
+
{
|
34314 |
+
"epoch": 0.9076379561908341,
|
34315 |
+
"grad_norm": 37.03125,
|
34316 |
+
"learning_rate": 9.858181610466552e-06,
|
34317 |
+
"loss": 19.0855,
|
34318 |
+
"step": 48910
|
34319 |
+
},
|
34320 |
+
{
|
34321 |
+
"epoch": 0.907823529275314,
|
34322 |
+
"grad_norm": 35.625,
|
34323 |
+
"learning_rate": 9.858152614680509e-06,
|
34324 |
+
"loss": 18.388,
|
34325 |
+
"step": 48920
|
34326 |
+
},
|
34327 |
+
{
|
34328 |
+
"epoch": 0.9080091023597937,
|
34329 |
+
"grad_norm": 38.625,
|
34330 |
+
"learning_rate": 9.858123618894468e-06,
|
34331 |
+
"loss": 19.0493,
|
34332 |
+
"step": 48930
|
34333 |
+
},
|
34334 |
+
{
|
34335 |
+
"epoch": 0.9081946754442736,
|
34336 |
+
"grad_norm": 35.6875,
|
34337 |
+
"learning_rate": 9.858094623108424e-06,
|
34338 |
+
"loss": 18.9766,
|
34339 |
+
"step": 48940
|
34340 |
+
},
|
34341 |
+
{
|
34342 |
+
"epoch": 0.9083802485287534,
|
34343 |
+
"grad_norm": 36.40625,
|
34344 |
+
"learning_rate": 9.858065627322381e-06,
|
34345 |
+
"loss": 18.6864,
|
34346 |
+
"step": 48950
|
34347 |
+
},
|
34348 |
+
{
|
34349 |
+
"epoch": 0.9085658216132332,
|
34350 |
+
"grad_norm": 37.9375,
|
34351 |
+
"learning_rate": 9.85803663153634e-06,
|
34352 |
+
"loss": 18.833,
|
34353 |
+
"step": 48960
|
34354 |
+
},
|
34355 |
+
{
|
34356 |
+
"epoch": 0.908751394697713,
|
34357 |
+
"grad_norm": 37.125,
|
34358 |
+
"learning_rate": 9.858007635750298e-06,
|
34359 |
+
"loss": 18.6554,
|
34360 |
+
"step": 48970
|
34361 |
+
},
|
34362 |
+
{
|
34363 |
+
"epoch": 0.9089369677821929,
|
34364 |
+
"grad_norm": 34.1875,
|
34365 |
+
"learning_rate": 9.857978639964255e-06,
|
34366 |
+
"loss": 18.564,
|
34367 |
+
"step": 48980
|
34368 |
+
},
|
34369 |
+
{
|
34370 |
+
"epoch": 0.9091225408666727,
|
34371 |
+
"grad_norm": 35.6875,
|
34372 |
+
"learning_rate": 9.857949644178213e-06,
|
34373 |
+
"loss": 18.8914,
|
34374 |
+
"step": 48990
|
34375 |
+
},
|
34376 |
+
{
|
34377 |
+
"epoch": 0.9093081139511525,
|
34378 |
+
"grad_norm": 34.71875,
|
34379 |
+
"learning_rate": 9.85792064839217e-06,
|
34380 |
+
"loss": 19.0573,
|
34381 |
+
"step": 49000
|
34382 |
+
},
|
34383 |
+
{
|
34384 |
+
"epoch": 0.9094936870356324,
|
34385 |
+
"grad_norm": 33.625,
|
34386 |
+
"learning_rate": 9.857891652606127e-06,
|
34387 |
+
"loss": 18.3762,
|
34388 |
+
"step": 49010
|
34389 |
+
},
|
34390 |
+
{
|
34391 |
+
"epoch": 0.9096792601201121,
|
34392 |
+
"grad_norm": 37.6875,
|
34393 |
+
"learning_rate": 9.857862656820085e-06,
|
34394 |
+
"loss": 19.0526,
|
34395 |
+
"step": 49020
|
34396 |
+
},
|
34397 |
+
{
|
34398 |
+
"epoch": 0.909864833204592,
|
34399 |
+
"grad_norm": 37.5,
|
34400 |
+
"learning_rate": 9.857833661034044e-06,
|
34401 |
+
"loss": 18.6206,
|
34402 |
+
"step": 49030
|
34403 |
+
},
|
34404 |
+
{
|
34405 |
+
"epoch": 0.9100504062890719,
|
34406 |
+
"grad_norm": 36.90625,
|
34407 |
+
"learning_rate": 9.857804665248001e-06,
|
34408 |
+
"loss": 18.535,
|
34409 |
+
"step": 49040
|
34410 |
+
},
|
34411 |
+
{
|
34412 |
+
"epoch": 0.9102359793735516,
|
34413 |
+
"grad_norm": 36.25,
|
34414 |
+
"learning_rate": 9.857775669461957e-06,
|
34415 |
+
"loss": 18.6812,
|
34416 |
+
"step": 49050
|
34417 |
+
},
|
34418 |
+
{
|
34419 |
+
"epoch": 0.9104215524580315,
|
34420 |
+
"grad_norm": 36.1875,
|
34421 |
+
"learning_rate": 9.857746673675916e-06,
|
34422 |
+
"loss": 18.8887,
|
34423 |
+
"step": 49060
|
34424 |
+
},
|
34425 |
+
{
|
34426 |
+
"epoch": 0.9106071255425113,
|
34427 |
+
"grad_norm": 35.5625,
|
34428 |
+
"learning_rate": 9.857717677889873e-06,
|
34429 |
+
"loss": 18.5824,
|
34430 |
+
"step": 49070
|
34431 |
+
},
|
34432 |
+
{
|
34433 |
+
"epoch": 0.9107926986269912,
|
34434 |
+
"grad_norm": 35.5,
|
34435 |
+
"learning_rate": 9.857688682103831e-06,
|
34436 |
+
"loss": 18.4892,
|
34437 |
+
"step": 49080
|
34438 |
+
},
|
34439 |
+
{
|
34440 |
+
"epoch": 0.9109782717114709,
|
34441 |
+
"grad_norm": 35.875,
|
34442 |
+
"learning_rate": 9.857659686317788e-06,
|
34443 |
+
"loss": 18.8687,
|
34444 |
+
"step": 49090
|
34445 |
+
},
|
34446 |
+
{
|
34447 |
+
"epoch": 0.9111638447959508,
|
34448 |
+
"grad_norm": 35.03125,
|
34449 |
+
"learning_rate": 9.857630690531746e-06,
|
34450 |
+
"loss": 19.2528,
|
34451 |
+
"step": 49100
|
34452 |
+
},
|
34453 |
+
{
|
34454 |
+
"epoch": 0.9113494178804307,
|
34455 |
+
"grad_norm": 37.1875,
|
34456 |
+
"learning_rate": 9.857601694745703e-06,
|
34457 |
+
"loss": 18.4546,
|
34458 |
+
"step": 49110
|
34459 |
+
},
|
34460 |
+
{
|
34461 |
+
"epoch": 0.9115349909649104,
|
34462 |
+
"grad_norm": 36.5625,
|
34463 |
+
"learning_rate": 9.85757269895966e-06,
|
34464 |
+
"loss": 18.619,
|
34465 |
+
"step": 49120
|
34466 |
+
},
|
34467 |
+
{
|
34468 |
+
"epoch": 0.9117205640493903,
|
34469 |
+
"grad_norm": 37.625,
|
34470 |
+
"learning_rate": 9.857543703173618e-06,
|
34471 |
+
"loss": 18.5371,
|
34472 |
+
"step": 49130
|
34473 |
+
},
|
34474 |
+
{
|
34475 |
+
"epoch": 0.9119061371338701,
|
34476 |
+
"grad_norm": 36.5,
|
34477 |
+
"learning_rate": 9.857514707387577e-06,
|
34478 |
+
"loss": 19.0488,
|
34479 |
+
"step": 49140
|
34480 |
+
},
|
34481 |
+
{
|
34482 |
+
"epoch": 0.9120917102183499,
|
34483 |
+
"grad_norm": 36.3125,
|
34484 |
+
"learning_rate": 9.857485711601533e-06,
|
34485 |
+
"loss": 18.4348,
|
34486 |
+
"step": 49150
|
34487 |
+
},
|
34488 |
+
{
|
34489 |
+
"epoch": 0.9122772833028298,
|
34490 |
+
"grad_norm": 35.53125,
|
34491 |
+
"learning_rate": 9.857456715815492e-06,
|
34492 |
+
"loss": 19.2483,
|
34493 |
+
"step": 49160
|
34494 |
+
},
|
34495 |
+
{
|
34496 |
+
"epoch": 0.9124628563873096,
|
34497 |
+
"grad_norm": 37.90625,
|
34498 |
+
"learning_rate": 9.85742772002945e-06,
|
34499 |
+
"loss": 18.7975,
|
34500 |
+
"step": 49170
|
34501 |
+
},
|
34502 |
+
{
|
34503 |
+
"epoch": 0.9126484294717894,
|
34504 |
+
"grad_norm": 35.71875,
|
34505 |
+
"learning_rate": 9.857398724243407e-06,
|
34506 |
+
"loss": 18.9149,
|
34507 |
+
"step": 49180
|
34508 |
+
},
|
34509 |
+
{
|
34510 |
+
"epoch": 0.9128340025562692,
|
34511 |
+
"grad_norm": 36.125,
|
34512 |
+
"learning_rate": 9.857369728457364e-06,
|
34513 |
+
"loss": 18.691,
|
34514 |
+
"step": 49190
|
34515 |
+
},
|
34516 |
+
{
|
34517 |
+
"epoch": 0.9130195756407491,
|
34518 |
+
"grad_norm": 36.90625,
|
34519 |
+
"learning_rate": 9.857340732671321e-06,
|
34520 |
+
"loss": 18.4709,
|
34521 |
+
"step": 49200
|
34522 |
+
},
|
34523 |
+
{
|
34524 |
+
"epoch": 0.9132051487252288,
|
34525 |
+
"grad_norm": 34.5625,
|
34526 |
+
"learning_rate": 9.857311736885279e-06,
|
34527 |
+
"loss": 18.5962,
|
34528 |
+
"step": 49210
|
34529 |
+
},
|
34530 |
+
{
|
34531 |
+
"epoch": 0.9133907218097087,
|
34532 |
+
"grad_norm": 35.375,
|
34533 |
+
"learning_rate": 9.857282741099236e-06,
|
34534 |
+
"loss": 18.9627,
|
34535 |
+
"step": 49220
|
34536 |
+
},
|
34537 |
+
{
|
34538 |
+
"epoch": 0.9135762948941886,
|
34539 |
+
"grad_norm": 34.8125,
|
34540 |
+
"learning_rate": 9.857253745313194e-06,
|
34541 |
+
"loss": 18.841,
|
34542 |
+
"step": 49230
|
34543 |
+
},
|
34544 |
+
{
|
34545 |
+
"epoch": 0.9137618679786683,
|
34546 |
+
"grad_norm": 34.8125,
|
34547 |
+
"learning_rate": 9.857224749527153e-06,
|
34548 |
+
"loss": 18.694,
|
34549 |
+
"step": 49240
|
34550 |
+
},
|
34551 |
+
{
|
34552 |
+
"epoch": 0.9139474410631482,
|
34553 |
+
"grad_norm": 37.6875,
|
34554 |
+
"learning_rate": 9.85719575374111e-06,
|
34555 |
+
"loss": 19.1713,
|
34556 |
+
"step": 49250
|
34557 |
+
},
|
34558 |
+
{
|
34559 |
+
"epoch": 0.914133014147628,
|
34560 |
+
"grad_norm": 35.625,
|
34561 |
+
"learning_rate": 9.857166757955066e-06,
|
34562 |
+
"loss": 18.9923,
|
34563 |
+
"step": 49260
|
34564 |
+
},
|
34565 |
+
{
|
34566 |
+
"epoch": 0.9143185872321079,
|
34567 |
+
"grad_norm": 36.78125,
|
34568 |
+
"learning_rate": 9.857137762169025e-06,
|
34569 |
+
"loss": 18.8704,
|
34570 |
+
"step": 49270
|
34571 |
+
},
|
34572 |
+
{
|
34573 |
+
"epoch": 0.9145041603165877,
|
34574 |
+
"grad_norm": 35.875,
|
34575 |
+
"learning_rate": 9.857108766382982e-06,
|
34576 |
+
"loss": 19.2151,
|
34577 |
+
"step": 49280
|
34578 |
+
},
|
34579 |
+
{
|
34580 |
+
"epoch": 0.9146897334010675,
|
34581 |
+
"grad_norm": 35.71875,
|
34582 |
+
"learning_rate": 9.85707977059694e-06,
|
34583 |
+
"loss": 18.6477,
|
34584 |
+
"step": 49290
|
34585 |
+
},
|
34586 |
+
{
|
34587 |
+
"epoch": 0.9148753064855474,
|
34588 |
+
"grad_norm": 36.875,
|
34589 |
+
"learning_rate": 9.857050774810897e-06,
|
34590 |
+
"loss": 18.5986,
|
34591 |
+
"step": 49300
|
34592 |
+
},
|
34593 |
+
{
|
34594 |
+
"epoch": 0.9150608795700271,
|
34595 |
+
"grad_norm": 35.375,
|
34596 |
+
"learning_rate": 9.857021779024855e-06,
|
34597 |
+
"loss": 18.695,
|
34598 |
+
"step": 49310
|
34599 |
+
},
|
34600 |
+
{
|
34601 |
+
"epoch": 0.915246452654507,
|
34602 |
+
"grad_norm": 36.0,
|
34603 |
+
"learning_rate": 9.856992783238812e-06,
|
34604 |
+
"loss": 18.5976,
|
34605 |
+
"step": 49320
|
34606 |
+
},
|
34607 |
+
{
|
34608 |
+
"epoch": 0.9154320257389869,
|
34609 |
+
"grad_norm": 36.28125,
|
34610 |
+
"learning_rate": 9.85696378745277e-06,
|
34611 |
+
"loss": 18.6872,
|
34612 |
+
"step": 49330
|
34613 |
+
},
|
34614 |
+
{
|
34615 |
+
"epoch": 0.9156175988234666,
|
34616 |
+
"grad_norm": 37.875,
|
34617 |
+
"learning_rate": 9.856934791666729e-06,
|
34618 |
+
"loss": 18.8967,
|
34619 |
+
"step": 49340
|
34620 |
+
},
|
34621 |
+
{
|
34622 |
+
"epoch": 0.9158031719079465,
|
34623 |
+
"grad_norm": 35.59375,
|
34624 |
+
"learning_rate": 9.856905795880686e-06,
|
34625 |
+
"loss": 18.6512,
|
34626 |
+
"step": 49350
|
34627 |
+
},
|
34628 |
+
{
|
34629 |
+
"epoch": 0.9159887449924263,
|
34630 |
+
"grad_norm": 35.9375,
|
34631 |
+
"learning_rate": 9.856876800094643e-06,
|
34632 |
+
"loss": 18.7375,
|
34633 |
+
"step": 49360
|
34634 |
+
},
|
34635 |
+
{
|
34636 |
+
"epoch": 0.9161743180769061,
|
34637 |
+
"grad_norm": 36.40625,
|
34638 |
+
"learning_rate": 9.8568478043086e-06,
|
34639 |
+
"loss": 18.6419,
|
34640 |
+
"step": 49370
|
34641 |
+
},
|
34642 |
+
{
|
34643 |
+
"epoch": 0.9163598911613859,
|
34644 |
+
"grad_norm": 35.0,
|
34645 |
+
"learning_rate": 9.856818808522558e-06,
|
34646 |
+
"loss": 18.6441,
|
34647 |
+
"step": 49380
|
34648 |
+
},
|
34649 |
+
{
|
34650 |
+
"epoch": 0.9165454642458658,
|
34651 |
+
"grad_norm": 35.25,
|
34652 |
+
"learning_rate": 9.856789812736516e-06,
|
34653 |
+
"loss": 19.2166,
|
34654 |
+
"step": 49390
|
34655 |
+
},
|
34656 |
+
{
|
34657 |
+
"epoch": 0.9167310373303456,
|
34658 |
+
"grad_norm": 36.1875,
|
34659 |
+
"learning_rate": 9.856760816950473e-06,
|
34660 |
+
"loss": 18.9291,
|
34661 |
+
"step": 49400
|
34662 |
+
},
|
34663 |
+
{
|
34664 |
+
"epoch": 0.9169166104148254,
|
34665 |
+
"grad_norm": 38.6875,
|
34666 |
+
"learning_rate": 9.856731821164432e-06,
|
34667 |
+
"loss": 18.5892,
|
34668 |
+
"step": 49410
|
34669 |
+
},
|
34670 |
+
{
|
34671 |
+
"epoch": 0.9171021834993053,
|
34672 |
+
"grad_norm": 37.28125,
|
34673 |
+
"learning_rate": 9.856702825378388e-06,
|
34674 |
+
"loss": 18.7727,
|
34675 |
+
"step": 49420
|
34676 |
+
},
|
34677 |
+
{
|
34678 |
+
"epoch": 0.9172877565837851,
|
34679 |
+
"grad_norm": 35.40625,
|
34680 |
+
"learning_rate": 9.856673829592345e-06,
|
34681 |
+
"loss": 19.09,
|
34682 |
+
"step": 49430
|
34683 |
+
},
|
34684 |
+
{
|
34685 |
+
"epoch": 0.9174733296682649,
|
34686 |
+
"grad_norm": 37.1875,
|
34687 |
+
"learning_rate": 9.856644833806304e-06,
|
34688 |
+
"loss": 18.3979,
|
34689 |
+
"step": 49440
|
34690 |
+
},
|
34691 |
+
{
|
34692 |
+
"epoch": 0.9176589027527448,
|
34693 |
+
"grad_norm": 35.25,
|
34694 |
+
"learning_rate": 9.856615838020262e-06,
|
34695 |
+
"loss": 18.9984,
|
34696 |
+
"step": 49450
|
34697 |
+
},
|
34698 |
+
{
|
34699 |
+
"epoch": 0.9178444758372246,
|
34700 |
+
"grad_norm": 35.875,
|
34701 |
+
"learning_rate": 9.856586842234219e-06,
|
34702 |
+
"loss": 18.7745,
|
34703 |
+
"step": 49460
|
34704 |
+
},
|
34705 |
+
{
|
34706 |
+
"epoch": 0.9180300489217044,
|
34707 |
+
"grad_norm": 36.625,
|
34708 |
+
"learning_rate": 9.856557846448177e-06,
|
34709 |
+
"loss": 18.9508,
|
34710 |
+
"step": 49470
|
34711 |
+
},
|
34712 |
+
{
|
34713 |
+
"epoch": 0.9182156220061842,
|
34714 |
+
"grad_norm": 36.5625,
|
34715 |
+
"learning_rate": 9.856528850662134e-06,
|
34716 |
+
"loss": 18.8949,
|
34717 |
+
"step": 49480
|
34718 |
+
},
|
34719 |
+
{
|
34720 |
+
"epoch": 0.9184011950906641,
|
34721 |
+
"grad_norm": 37.5,
|
34722 |
+
"learning_rate": 9.856499854876091e-06,
|
34723 |
+
"loss": 19.3296,
|
34724 |
+
"step": 49490
|
34725 |
+
},
|
34726 |
+
{
|
34727 |
+
"epoch": 0.9185867681751438,
|
34728 |
+
"grad_norm": 35.71875,
|
34729 |
+
"learning_rate": 9.856470859090049e-06,
|
34730 |
+
"loss": 19.2355,
|
34731 |
+
"step": 49500
|
34732 |
+
},
|
34733 |
+
{
|
34734 |
+
"epoch": 0.9187723412596237,
|
34735 |
+
"grad_norm": 36.03125,
|
34736 |
+
"learning_rate": 9.856441863304008e-06,
|
34737 |
+
"loss": 18.7253,
|
34738 |
+
"step": 49510
|
34739 |
+
},
|
34740 |
+
{
|
34741 |
+
"epoch": 0.9189579143441036,
|
34742 |
+
"grad_norm": 36.90625,
|
34743 |
+
"learning_rate": 9.856412867517965e-06,
|
34744 |
+
"loss": 18.6996,
|
34745 |
+
"step": 49520
|
34746 |
+
},
|
34747 |
+
{
|
34748 |
+
"epoch": 0.9191434874285833,
|
34749 |
+
"grad_norm": 36.34375,
|
34750 |
+
"learning_rate": 9.856383871731921e-06,
|
34751 |
+
"loss": 19.0339,
|
34752 |
+
"step": 49530
|
34753 |
+
},
|
34754 |
+
{
|
34755 |
+
"epoch": 0.9193290605130632,
|
34756 |
+
"grad_norm": 35.59375,
|
34757 |
+
"learning_rate": 9.85635487594588e-06,
|
34758 |
+
"loss": 18.3551,
|
34759 |
+
"step": 49540
|
34760 |
+
},
|
34761 |
+
{
|
34762 |
+
"epoch": 0.919514633597543,
|
34763 |
+
"grad_norm": 37.1875,
|
34764 |
+
"learning_rate": 9.856325880159837e-06,
|
34765 |
+
"loss": 18.76,
|
34766 |
+
"step": 49550
|
34767 |
+
},
|
34768 |
+
{
|
34769 |
+
"epoch": 0.9197002066820228,
|
34770 |
+
"grad_norm": 36.5625,
|
34771 |
+
"learning_rate": 9.856296884373795e-06,
|
34772 |
+
"loss": 18.2547,
|
34773 |
+
"step": 49560
|
34774 |
+
},
|
34775 |
+
{
|
34776 |
+
"epoch": 0.9198857797665027,
|
34777 |
+
"grad_norm": 37.5,
|
34778 |
+
"learning_rate": 9.856267888587752e-06,
|
34779 |
+
"loss": 18.8997,
|
34780 |
+
"step": 49570
|
34781 |
+
},
|
34782 |
+
{
|
34783 |
+
"epoch": 0.9200713528509825,
|
34784 |
+
"grad_norm": 36.78125,
|
34785 |
+
"learning_rate": 9.85623889280171e-06,
|
34786 |
+
"loss": 18.4645,
|
34787 |
+
"step": 49580
|
34788 |
+
},
|
34789 |
+
{
|
34790 |
+
"epoch": 0.9202569259354623,
|
34791 |
+
"grad_norm": 36.78125,
|
34792 |
+
"learning_rate": 9.856209897015667e-06,
|
34793 |
+
"loss": 18.8955,
|
34794 |
+
"step": 49590
|
34795 |
+
},
|
34796 |
+
{
|
34797 |
+
"epoch": 0.9204424990199421,
|
34798 |
+
"grad_norm": 37.59375,
|
34799 |
+
"learning_rate": 9.856180901229625e-06,
|
34800 |
+
"loss": 18.3526,
|
34801 |
+
"step": 49600
|
34802 |
+
},
|
34803 |
+
{
|
34804 |
+
"epoch": 0.920628072104422,
|
34805 |
+
"grad_norm": 36.53125,
|
34806 |
+
"learning_rate": 9.856151905443584e-06,
|
34807 |
+
"loss": 19.0135,
|
34808 |
+
"step": 49610
|
34809 |
+
},
|
34810 |
+
{
|
34811 |
+
"epoch": 0.9208136451889019,
|
34812 |
+
"grad_norm": 36.28125,
|
34813 |
+
"learning_rate": 9.856122909657541e-06,
|
34814 |
+
"loss": 18.5804,
|
34815 |
+
"step": 49620
|
34816 |
+
},
|
34817 |
+
{
|
34818 |
+
"epoch": 0.9209992182733816,
|
34819 |
+
"grad_norm": 37.40625,
|
34820 |
+
"learning_rate": 9.856093913871498e-06,
|
34821 |
+
"loss": 18.6843,
|
34822 |
+
"step": 49630
|
34823 |
+
},
|
34824 |
+
{
|
34825 |
+
"epoch": 0.9211847913578615,
|
34826 |
+
"grad_norm": 34.40625,
|
34827 |
+
"learning_rate": 9.856064918085456e-06,
|
34828 |
+
"loss": 19.2493,
|
34829 |
+
"step": 49640
|
34830 |
+
},
|
34831 |
+
{
|
34832 |
+
"epoch": 0.9213703644423413,
|
34833 |
+
"grad_norm": 36.875,
|
34834 |
+
"learning_rate": 9.856035922299413e-06,
|
34835 |
+
"loss": 18.6309,
|
34836 |
+
"step": 49650
|
34837 |
+
},
|
34838 |
+
{
|
34839 |
+
"epoch": 0.9215559375268211,
|
34840 |
+
"grad_norm": 37.5,
|
34841 |
+
"learning_rate": 9.85600692651337e-06,
|
34842 |
+
"loss": 18.8726,
|
34843 |
+
"step": 49660
|
34844 |
+
},
|
34845 |
+
{
|
34846 |
+
"epoch": 0.9217415106113009,
|
34847 |
+
"grad_norm": 36.375,
|
34848 |
+
"learning_rate": 9.855977930727328e-06,
|
34849 |
+
"loss": 18.5765,
|
34850 |
+
"step": 49670
|
34851 |
+
},
|
34852 |
+
{
|
34853 |
+
"epoch": 0.9219270836957808,
|
34854 |
+
"grad_norm": 36.21875,
|
34855 |
+
"learning_rate": 9.855948934941285e-06,
|
34856 |
+
"loss": 18.8799,
|
34857 |
+
"step": 49680
|
34858 |
+
},
|
34859 |
+
{
|
34860 |
+
"epoch": 0.9221126567802606,
|
34861 |
+
"grad_norm": 35.84375,
|
34862 |
+
"learning_rate": 9.855919939155243e-06,
|
34863 |
+
"loss": 18.758,
|
34864 |
+
"step": 49690
|
34865 |
+
},
|
34866 |
+
{
|
34867 |
+
"epoch": 0.9222982298647404,
|
34868 |
+
"grad_norm": 36.96875,
|
34869 |
+
"learning_rate": 9.8558909433692e-06,
|
34870 |
+
"loss": 18.7222,
|
34871 |
+
"step": 49700
|
34872 |
+
},
|
34873 |
+
{
|
34874 |
+
"epoch": 0.9224838029492203,
|
34875 |
+
"grad_norm": 35.78125,
|
34876 |
+
"learning_rate": 9.855861947583158e-06,
|
34877 |
+
"loss": 18.7792,
|
34878 |
+
"step": 49710
|
34879 |
+
},
|
34880 |
+
{
|
34881 |
+
"epoch": 0.9226693760337,
|
34882 |
+
"grad_norm": 35.40625,
|
34883 |
+
"learning_rate": 9.855832951797117e-06,
|
34884 |
+
"loss": 18.7375,
|
34885 |
+
"step": 49720
|
34886 |
+
},
|
34887 |
+
{
|
34888 |
+
"epoch": 0.9228549491181799,
|
34889 |
+
"grad_norm": 36.9375,
|
34890 |
+
"learning_rate": 9.855803956011074e-06,
|
34891 |
+
"loss": 18.734,
|
34892 |
+
"step": 49730
|
34893 |
+
},
|
34894 |
+
{
|
34895 |
+
"epoch": 0.9230405222026598,
|
34896 |
+
"grad_norm": 37.46875,
|
34897 |
+
"learning_rate": 9.855774960225032e-06,
|
34898 |
+
"loss": 18.7155,
|
34899 |
+
"step": 49740
|
34900 |
+
},
|
34901 |
+
{
|
34902 |
+
"epoch": 0.9232260952871395,
|
34903 |
+
"grad_norm": 38.0,
|
34904 |
+
"learning_rate": 9.855745964438989e-06,
|
34905 |
+
"loss": 19.2284,
|
34906 |
+
"step": 49750
|
34907 |
+
},
|
34908 |
+
{
|
34909 |
+
"epoch": 0.9234116683716194,
|
34910 |
+
"grad_norm": 38.1875,
|
34911 |
+
"learning_rate": 9.855716968652946e-06,
|
34912 |
+
"loss": 19.4265,
|
34913 |
+
"step": 49760
|
34914 |
+
},
|
34915 |
+
{
|
34916 |
+
"epoch": 0.9235972414560992,
|
34917 |
+
"grad_norm": 36.46875,
|
34918 |
+
"learning_rate": 9.855687972866904e-06,
|
34919 |
+
"loss": 18.7159,
|
34920 |
+
"step": 49770
|
34921 |
+
},
|
34922 |
+
{
|
34923 |
+
"epoch": 0.9237828145405791,
|
34924 |
+
"grad_norm": 37.53125,
|
34925 |
+
"learning_rate": 9.855658977080861e-06,
|
34926 |
+
"loss": 18.4277,
|
34927 |
+
"step": 49780
|
34928 |
+
},
|
34929 |
+
{
|
34930 |
+
"epoch": 0.9239683876250588,
|
34931 |
+
"grad_norm": 37.15625,
|
34932 |
+
"learning_rate": 9.85562998129482e-06,
|
34933 |
+
"loss": 18.7183,
|
34934 |
+
"step": 49790
|
34935 |
+
},
|
34936 |
+
{
|
34937 |
+
"epoch": 0.9241539607095387,
|
34938 |
+
"grad_norm": 36.03125,
|
34939 |
+
"learning_rate": 9.855600985508776e-06,
|
34940 |
+
"loss": 18.8495,
|
34941 |
+
"step": 49800
|
34942 |
+
},
|
34943 |
+
{
|
34944 |
+
"epoch": 0.9243395337940186,
|
34945 |
+
"grad_norm": 36.0625,
|
34946 |
+
"learning_rate": 9.855571989722733e-06,
|
34947 |
+
"loss": 19.4189,
|
34948 |
+
"step": 49810
|
34949 |
+
},
|
34950 |
+
{
|
34951 |
+
"epoch": 0.9245251068784983,
|
34952 |
+
"grad_norm": 37.625,
|
34953 |
+
"learning_rate": 9.855542993936693e-06,
|
34954 |
+
"loss": 18.4683,
|
34955 |
+
"step": 49820
|
34956 |
+
},
|
34957 |
+
{
|
34958 |
+
"epoch": 0.9247106799629782,
|
34959 |
+
"grad_norm": 37.09375,
|
34960 |
+
"learning_rate": 9.85551399815065e-06,
|
34961 |
+
"loss": 19.0064,
|
34962 |
+
"step": 49830
|
34963 |
+
},
|
34964 |
+
{
|
34965 |
+
"epoch": 0.924896253047458,
|
34966 |
+
"grad_norm": 38.1875,
|
34967 |
+
"learning_rate": 9.855485002364607e-06,
|
34968 |
+
"loss": 18.644,
|
34969 |
+
"step": 49840
|
34970 |
+
},
|
34971 |
+
{
|
34972 |
+
"epoch": 0.9250818261319378,
|
34973 |
+
"grad_norm": 37.25,
|
34974 |
+
"learning_rate": 9.855456006578565e-06,
|
34975 |
+
"loss": 19.2591,
|
34976 |
+
"step": 49850
|
34977 |
+
},
|
34978 |
+
{
|
34979 |
+
"epoch": 0.9252673992164177,
|
34980 |
+
"grad_norm": 36.90625,
|
34981 |
+
"learning_rate": 9.855427010792522e-06,
|
34982 |
+
"loss": 18.8081,
|
34983 |
+
"step": 49860
|
34984 |
+
},
|
34985 |
+
{
|
34986 |
+
"epoch": 0.9254529723008975,
|
34987 |
+
"grad_norm": 36.625,
|
34988 |
+
"learning_rate": 9.85539801500648e-06,
|
34989 |
+
"loss": 19.0542,
|
34990 |
+
"step": 49870
|
34991 |
+
},
|
34992 |
+
{
|
34993 |
+
"epoch": 0.9256385453853773,
|
34994 |
+
"grad_norm": 35.8125,
|
34995 |
+
"learning_rate": 9.855369019220437e-06,
|
34996 |
+
"loss": 18.7709,
|
34997 |
+
"step": 49880
|
34998 |
+
},
|
34999 |
+
{
|
35000 |
+
"epoch": 0.9258241184698571,
|
35001 |
+
"grad_norm": 38.25,
|
35002 |
+
"learning_rate": 9.855340023434396e-06,
|
35003 |
+
"loss": 19.0913,
|
35004 |
+
"step": 49890
|
35005 |
+
},
|
35006 |
+
{
|
35007 |
+
"epoch": 0.926009691554337,
|
35008 |
+
"grad_norm": 35.8125,
|
35009 |
+
"learning_rate": 9.855311027648352e-06,
|
35010 |
+
"loss": 18.8659,
|
35011 |
+
"step": 49900
|
35012 |
+
},
|
35013 |
+
{
|
35014 |
+
"epoch": 0.9261952646388167,
|
35015 |
+
"grad_norm": 37.15625,
|
35016 |
+
"learning_rate": 9.85528203186231e-06,
|
35017 |
+
"loss": 18.3361,
|
35018 |
+
"step": 49910
|
35019 |
+
},
|
35020 |
+
{
|
35021 |
+
"epoch": 0.9263808377232966,
|
35022 |
+
"grad_norm": 34.78125,
|
35023 |
+
"learning_rate": 9.855253036076268e-06,
|
35024 |
+
"loss": 19.2276,
|
35025 |
+
"step": 49920
|
35026 |
+
},
|
35027 |
+
{
|
35028 |
+
"epoch": 0.9265664108077765,
|
35029 |
+
"grad_norm": 37.34375,
|
35030 |
+
"learning_rate": 9.855224040290226e-06,
|
35031 |
+
"loss": 18.7894,
|
35032 |
+
"step": 49930
|
35033 |
+
},
|
35034 |
+
{
|
35035 |
+
"epoch": 0.9267519838922562,
|
35036 |
+
"grad_norm": 35.96875,
|
35037 |
+
"learning_rate": 9.855195044504183e-06,
|
35038 |
+
"loss": 18.6204,
|
35039 |
+
"step": 49940
|
35040 |
+
},
|
35041 |
+
{
|
35042 |
+
"epoch": 0.9269375569767361,
|
35043 |
+
"grad_norm": 36.90625,
|
35044 |
+
"learning_rate": 9.85516604871814e-06,
|
35045 |
+
"loss": 18.4182,
|
35046 |
+
"step": 49950
|
35047 |
+
},
|
35048 |
+
{
|
35049 |
+
"epoch": 0.9271231300612159,
|
35050 |
+
"grad_norm": 35.53125,
|
35051 |
+
"learning_rate": 9.855137052932098e-06,
|
35052 |
+
"loss": 18.6139,
|
35053 |
+
"step": 49960
|
35054 |
+
},
|
35055 |
+
{
|
35056 |
+
"epoch": 0.9273087031456958,
|
35057 |
+
"grad_norm": 37.53125,
|
35058 |
+
"learning_rate": 9.855108057146055e-06,
|
35059 |
+
"loss": 18.7786,
|
35060 |
+
"step": 49970
|
35061 |
+
},
|
35062 |
+
{
|
35063 |
+
"epoch": 0.9274942762301756,
|
35064 |
+
"grad_norm": 37.375,
|
35065 |
+
"learning_rate": 9.855079061360013e-06,
|
35066 |
+
"loss": 18.8478,
|
35067 |
+
"step": 49980
|
35068 |
+
},
|
35069 |
+
{
|
35070 |
+
"epoch": 0.9276798493146554,
|
35071 |
+
"grad_norm": 34.125,
|
35072 |
+
"learning_rate": 9.855050065573972e-06,
|
35073 |
+
"loss": 18.4067,
|
35074 |
+
"step": 49990
|
35075 |
+
},
|
35076 |
+
{
|
35077 |
+
"epoch": 0.9278654223991353,
|
35078 |
+
"grad_norm": 36.40625,
|
35079 |
+
"learning_rate": 9.85502106978793e-06,
|
35080 |
+
"loss": 18.7262,
|
35081 |
+
"step": 50000
|
35082 |
+
},
|
35083 |
+
{
|
35084 |
+
"epoch": 0.9278654223991353,
|
35085 |
+
"eval_loss": 2.3473334312438965,
|
35086 |
+
"eval_runtime": 455.3845,
|
35087 |
+
"eval_samples_per_second": 3188.771,
|
35088 |
+
"eval_steps_per_second": 49.826,
|
35089 |
+
"step": 50000
|
35090 |
}
|
35091 |
],
|
35092 |
"logging_steps": 10,
|
|
|
35106 |
"attributes": {}
|
35107 |
}
|
35108 |
},
|
35109 |
+
"total_flos": 8.72761379520512e+18,
|
35110 |
"train_batch_size": 8,
|
35111 |
"trial_name": null,
|
35112 |
"trial_params": null
|