Training in progress, step 6453, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 377528296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9424ead24c1f9928d93dcca08f08662e27a0300efc062a3a829f95c3b8e226c6
|
3 |
size 377528296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 755217530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d27d2e7252f6cdeeeabe5085916f4a1a350decae7f63cd39ae04981d36e05dc3
|
3 |
size 755217530
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20d715ec9ae5a99a51d0f413b64f52f539737bb65299888f322d1b46910817b7
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0228103870c5c91f9e7c1c49686736ceb20668b7b5baf93d7127be66bdf65f06
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -43505,6 +43505,1679 @@
|
|
43505 |
"learning_rate": 9.980445299898722e-07,
|
43506 |
"loss": 1.305,
|
43507 |
"step": 6214
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43508 |
}
|
43509 |
],
|
43510 |
"logging_steps": 1,
|
@@ -43524,7 +45197,7 @@
|
|
43524 |
"attributes": {}
|
43525 |
}
|
43526 |
},
|
43527 |
-
"total_flos":
|
43528 |
"train_batch_size": 4,
|
43529 |
"trial_name": null,
|
43530 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.9733031674208145,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 6453,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
43505 |
"learning_rate": 9.980445299898722e-07,
|
43506 |
"loss": 1.305,
|
43507 |
"step": 6214
|
43508 |
+
},
|
43509 |
+
{
|
43510 |
+
"epoch": 0.9374057315233786,
|
43511 |
+
"grad_norm": 1.9081813097000122,
|
43512 |
+
"learning_rate": 9.932679372475883e-07,
|
43513 |
+
"loss": 0.9995,
|
43514 |
+
"step": 6215
|
43515 |
+
},
|
43516 |
+
{
|
43517 |
+
"epoch": 0.9375565610859729,
|
43518 |
+
"grad_norm": 1.9167311191558838,
|
43519 |
+
"learning_rate": 9.885026875362746e-07,
|
43520 |
+
"loss": 0.9232,
|
43521 |
+
"step": 6216
|
43522 |
+
},
|
43523 |
+
{
|
43524 |
+
"epoch": 0.9377073906485671,
|
43525 |
+
"grad_norm": 1.6560578346252441,
|
43526 |
+
"learning_rate": 9.83748781958882e-07,
|
43527 |
+
"loss": 0.9508,
|
43528 |
+
"step": 6217
|
43529 |
+
},
|
43530 |
+
{
|
43531 |
+
"epoch": 0.9378582202111614,
|
43532 |
+
"grad_norm": 1.9292479753494263,
|
43533 |
+
"learning_rate": 9.790062216157469e-07,
|
43534 |
+
"loss": 1.1442,
|
43535 |
+
"step": 6218
|
43536 |
+
},
|
43537 |
+
{
|
43538 |
+
"epoch": 0.9380090497737557,
|
43539 |
+
"grad_norm": 2.204526901245117,
|
43540 |
+
"learning_rate": 9.742750076045749e-07,
|
43541 |
+
"loss": 1.1502,
|
43542 |
+
"step": 6219
|
43543 |
+
},
|
43544 |
+
{
|
43545 |
+
"epoch": 0.9381598793363499,
|
43546 |
+
"grad_norm": 2.1179401874542236,
|
43547 |
+
"learning_rate": 9.695551410204506e-07,
|
43548 |
+
"loss": 1.2448,
|
43549 |
+
"step": 6220
|
43550 |
+
},
|
43551 |
+
{
|
43552 |
+
"epoch": 0.9383107088989442,
|
43553 |
+
"grad_norm": 2.0917677879333496,
|
43554 |
+
"learning_rate": 9.648466229558174e-07,
|
43555 |
+
"loss": 1.2901,
|
43556 |
+
"step": 6221
|
43557 |
+
},
|
43558 |
+
{
|
43559 |
+
"epoch": 0.9384615384615385,
|
43560 |
+
"grad_norm": 1.9553565979003906,
|
43561 |
+
"learning_rate": 9.601494545005085e-07,
|
43562 |
+
"loss": 0.9956,
|
43563 |
+
"step": 6222
|
43564 |
+
},
|
43565 |
+
{
|
43566 |
+
"epoch": 0.9386123680241327,
|
43567 |
+
"grad_norm": 2.0436511039733887,
|
43568 |
+
"learning_rate": 9.554636367417269e-07,
|
43569 |
+
"loss": 0.9519,
|
43570 |
+
"step": 6223
|
43571 |
+
},
|
43572 |
+
{
|
43573 |
+
"epoch": 0.938763197586727,
|
43574 |
+
"grad_norm": 1.8288614749908447,
|
43575 |
+
"learning_rate": 9.507891707640437e-07,
|
43576 |
+
"loss": 1.0703,
|
43577 |
+
"step": 6224
|
43578 |
+
},
|
43579 |
+
{
|
43580 |
+
"epoch": 0.9389140271493213,
|
43581 |
+
"grad_norm": 1.8449335098266602,
|
43582 |
+
"learning_rate": 9.461260576494046e-07,
|
43583 |
+
"loss": 0.983,
|
43584 |
+
"step": 6225
|
43585 |
+
},
|
43586 |
+
{
|
43587 |
+
"epoch": 0.9390648567119155,
|
43588 |
+
"grad_norm": 1.7894610166549683,
|
43589 |
+
"learning_rate": 9.414742984771241e-07,
|
43590 |
+
"loss": 0.8819,
|
43591 |
+
"step": 6226
|
43592 |
+
},
|
43593 |
+
{
|
43594 |
+
"epoch": 0.9392156862745098,
|
43595 |
+
"grad_norm": 2.2926836013793945,
|
43596 |
+
"learning_rate": 9.368338943238964e-07,
|
43597 |
+
"loss": 1.1588,
|
43598 |
+
"step": 6227
|
43599 |
+
},
|
43600 |
+
{
|
43601 |
+
"epoch": 0.9393665158371041,
|
43602 |
+
"grad_norm": 2.1777687072753906,
|
43603 |
+
"learning_rate": 9.322048462637789e-07,
|
43604 |
+
"loss": 1.3449,
|
43605 |
+
"step": 6228
|
43606 |
+
},
|
43607 |
+
{
|
43608 |
+
"epoch": 0.9395173453996983,
|
43609 |
+
"grad_norm": 1.7373837232589722,
|
43610 |
+
"learning_rate": 9.275871553682092e-07,
|
43611 |
+
"loss": 0.938,
|
43612 |
+
"step": 6229
|
43613 |
+
},
|
43614 |
+
{
|
43615 |
+
"epoch": 0.9396681749622926,
|
43616 |
+
"grad_norm": 1.9276561737060547,
|
43617 |
+
"learning_rate": 9.229808227059878e-07,
|
43618 |
+
"loss": 0.9975,
|
43619 |
+
"step": 6230
|
43620 |
+
},
|
43621 |
+
{
|
43622 |
+
"epoch": 0.9398190045248869,
|
43623 |
+
"grad_norm": 1.7900891304016113,
|
43624 |
+
"learning_rate": 9.183858493432895e-07,
|
43625 |
+
"loss": 0.8548,
|
43626 |
+
"step": 6231
|
43627 |
+
},
|
43628 |
+
{
|
43629 |
+
"epoch": 0.9399698340874811,
|
43630 |
+
"grad_norm": 2.07910418510437,
|
43631 |
+
"learning_rate": 9.138022363436638e-07,
|
43632 |
+
"loss": 1.1805,
|
43633 |
+
"step": 6232
|
43634 |
+
},
|
43635 |
+
{
|
43636 |
+
"epoch": 0.9401206636500754,
|
43637 |
+
"grad_norm": 1.7643636465072632,
|
43638 |
+
"learning_rate": 9.09229984768023e-07,
|
43639 |
+
"loss": 0.991,
|
43640 |
+
"step": 6233
|
43641 |
+
},
|
43642 |
+
{
|
43643 |
+
"epoch": 0.9402714932126697,
|
43644 |
+
"grad_norm": 1.8613131046295166,
|
43645 |
+
"learning_rate": 9.046690956746595e-07,
|
43646 |
+
"loss": 1.0485,
|
43647 |
+
"step": 6234
|
43648 |
+
},
|
43649 |
+
{
|
43650 |
+
"epoch": 0.9404223227752639,
|
43651 |
+
"grad_norm": 2.2653021812438965,
|
43652 |
+
"learning_rate": 9.001195701192289e-07,
|
43653 |
+
"loss": 1.2899,
|
43654 |
+
"step": 6235
|
43655 |
+
},
|
43656 |
+
{
|
43657 |
+
"epoch": 0.9405731523378582,
|
43658 |
+
"grad_norm": 2.3277347087860107,
|
43659 |
+
"learning_rate": 8.955814091547609e-07,
|
43660 |
+
"loss": 1.4375,
|
43661 |
+
"step": 6236
|
43662 |
+
},
|
43663 |
+
{
|
43664 |
+
"epoch": 0.9407239819004525,
|
43665 |
+
"grad_norm": 2.1147515773773193,
|
43666 |
+
"learning_rate": 8.910546138316433e-07,
|
43667 |
+
"loss": 1.1043,
|
43668 |
+
"step": 6237
|
43669 |
+
},
|
43670 |
+
{
|
43671 |
+
"epoch": 0.9408748114630467,
|
43672 |
+
"grad_norm": 1.4688125848770142,
|
43673 |
+
"learning_rate": 8.865391851976491e-07,
|
43674 |
+
"loss": 0.6265,
|
43675 |
+
"step": 6238
|
43676 |
+
},
|
43677 |
+
{
|
43678 |
+
"epoch": 0.941025641025641,
|
43679 |
+
"grad_norm": 1.7932592630386353,
|
43680 |
+
"learning_rate": 8.820351242979141e-07,
|
43681 |
+
"loss": 0.8438,
|
43682 |
+
"step": 6239
|
43683 |
+
},
|
43684 |
+
{
|
43685 |
+
"epoch": 0.9411764705882353,
|
43686 |
+
"grad_norm": 2.096954107284546,
|
43687 |
+
"learning_rate": 8.775424321749382e-07,
|
43688 |
+
"loss": 1.0978,
|
43689 |
+
"step": 6240
|
43690 |
+
},
|
43691 |
+
{
|
43692 |
+
"epoch": 0.9413273001508295,
|
43693 |
+
"grad_norm": 2.0427722930908203,
|
43694 |
+
"learning_rate": 8.730611098685948e-07,
|
43695 |
+
"loss": 1.0328,
|
43696 |
+
"step": 6241
|
43697 |
+
},
|
43698 |
+
{
|
43699 |
+
"epoch": 0.9414781297134238,
|
43700 |
+
"grad_norm": 1.7261981964111328,
|
43701 |
+
"learning_rate": 8.685911584161266e-07,
|
43702 |
+
"loss": 0.7771,
|
43703 |
+
"step": 6242
|
43704 |
+
},
|
43705 |
+
{
|
43706 |
+
"epoch": 0.9416289592760181,
|
43707 |
+
"grad_norm": 2.103815793991089,
|
43708 |
+
"learning_rate": 8.641325788521393e-07,
|
43709 |
+
"loss": 0.998,
|
43710 |
+
"step": 6243
|
43711 |
+
},
|
43712 |
+
{
|
43713 |
+
"epoch": 0.9417797888386124,
|
43714 |
+
"grad_norm": 2.4655301570892334,
|
43715 |
+
"learning_rate": 8.596853722086074e-07,
|
43716 |
+
"loss": 1.237,
|
43717 |
+
"step": 6244
|
43718 |
+
},
|
43719 |
+
{
|
43720 |
+
"epoch": 0.9419306184012066,
|
43721 |
+
"grad_norm": 2.7287237644195557,
|
43722 |
+
"learning_rate": 8.552495395148852e-07,
|
43723 |
+
"loss": 1.4164,
|
43724 |
+
"step": 6245
|
43725 |
+
},
|
43726 |
+
{
|
43727 |
+
"epoch": 0.9420814479638009,
|
43728 |
+
"grad_norm": 1.7122939825057983,
|
43729 |
+
"learning_rate": 8.508250817976737e-07,
|
43730 |
+
"loss": 0.9301,
|
43731 |
+
"step": 6246
|
43732 |
+
},
|
43733 |
+
{
|
43734 |
+
"epoch": 0.9422322775263952,
|
43735 |
+
"grad_norm": 2.004281997680664,
|
43736 |
+
"learning_rate": 8.464120000810538e-07,
|
43737 |
+
"loss": 1.1161,
|
43738 |
+
"step": 6247
|
43739 |
+
},
|
43740 |
+
{
|
43741 |
+
"epoch": 0.9423831070889894,
|
43742 |
+
"grad_norm": 1.7664074897766113,
|
43743 |
+
"learning_rate": 8.420102953864806e-07,
|
43744 |
+
"loss": 0.8845,
|
43745 |
+
"step": 6248
|
43746 |
+
},
|
43747 |
+
{
|
43748 |
+
"epoch": 0.9425339366515837,
|
43749 |
+
"grad_norm": 1.9992055892944336,
|
43750 |
+
"learning_rate": 8.376199687327558e-07,
|
43751 |
+
"loss": 0.8977,
|
43752 |
+
"step": 6249
|
43753 |
+
},
|
43754 |
+
{
|
43755 |
+
"epoch": 0.942684766214178,
|
43756 |
+
"grad_norm": 1.8883174657821655,
|
43757 |
+
"learning_rate": 8.332410211360609e-07,
|
43758 |
+
"loss": 0.8384,
|
43759 |
+
"step": 6250
|
43760 |
+
},
|
43761 |
+
{
|
43762 |
+
"epoch": 0.9428355957767722,
|
43763 |
+
"grad_norm": 1.8478468656539917,
|
43764 |
+
"learning_rate": 8.288734536099408e-07,
|
43765 |
+
"loss": 1.2085,
|
43766 |
+
"step": 6251
|
43767 |
+
},
|
43768 |
+
{
|
43769 |
+
"epoch": 0.9429864253393665,
|
43770 |
+
"grad_norm": 1.9000264406204224,
|
43771 |
+
"learning_rate": 8.245172671653145e-07,
|
43772 |
+
"loss": 1.1676,
|
43773 |
+
"step": 6252
|
43774 |
+
},
|
43775 |
+
{
|
43776 |
+
"epoch": 0.9431372549019608,
|
43777 |
+
"grad_norm": 1.8996500968933105,
|
43778 |
+
"learning_rate": 8.201724628104535e-07,
|
43779 |
+
"loss": 1.1095,
|
43780 |
+
"step": 6253
|
43781 |
+
},
|
43782 |
+
{
|
43783 |
+
"epoch": 0.943288084464555,
|
43784 |
+
"grad_norm": 1.6948037147521973,
|
43785 |
+
"learning_rate": 8.15839041550992e-07,
|
43786 |
+
"loss": 0.9271,
|
43787 |
+
"step": 6254
|
43788 |
+
},
|
43789 |
+
{
|
43790 |
+
"epoch": 0.9434389140271493,
|
43791 |
+
"grad_norm": 1.878197193145752,
|
43792 |
+
"learning_rate": 8.115170043899501e-07,
|
43793 |
+
"loss": 0.9737,
|
43794 |
+
"step": 6255
|
43795 |
+
},
|
43796 |
+
{
|
43797 |
+
"epoch": 0.9435897435897436,
|
43798 |
+
"grad_norm": 2.2108347415924072,
|
43799 |
+
"learning_rate": 8.07206352327694e-07,
|
43800 |
+
"loss": 1.4355,
|
43801 |
+
"step": 6256
|
43802 |
+
},
|
43803 |
+
{
|
43804 |
+
"epoch": 0.9437405731523378,
|
43805 |
+
"grad_norm": 1.8024992942810059,
|
43806 |
+
"learning_rate": 8.029070863619648e-07,
|
43807 |
+
"loss": 1.1714,
|
43808 |
+
"step": 6257
|
43809 |
+
},
|
43810 |
+
{
|
43811 |
+
"epoch": 0.9438914027149321,
|
43812 |
+
"grad_norm": 1.9239073991775513,
|
43813 |
+
"learning_rate": 7.986192074878607e-07,
|
43814 |
+
"loss": 0.9482,
|
43815 |
+
"step": 6258
|
43816 |
+
},
|
43817 |
+
{
|
43818 |
+
"epoch": 0.9440422322775264,
|
43819 |
+
"grad_norm": 1.612963318824768,
|
43820 |
+
"learning_rate": 7.943427166978546e-07,
|
43821 |
+
"loss": 0.9701,
|
43822 |
+
"step": 6259
|
43823 |
+
},
|
43824 |
+
{
|
43825 |
+
"epoch": 0.9441930618401206,
|
43826 |
+
"grad_norm": 2.360172748565674,
|
43827 |
+
"learning_rate": 7.900776149817713e-07,
|
43828 |
+
"loss": 1.3821,
|
43829 |
+
"step": 6260
|
43830 |
+
},
|
43831 |
+
{
|
43832 |
+
"epoch": 0.9443438914027149,
|
43833 |
+
"grad_norm": 1.8003296852111816,
|
43834 |
+
"learning_rate": 7.8582390332681e-07,
|
43835 |
+
"loss": 0.9967,
|
43836 |
+
"step": 6261
|
43837 |
+
},
|
43838 |
+
{
|
43839 |
+
"epoch": 0.9444947209653092,
|
43840 |
+
"grad_norm": 1.9901286363601685,
|
43841 |
+
"learning_rate": 7.815815827175221e-07,
|
43842 |
+
"loss": 1.2425,
|
43843 |
+
"step": 6262
|
43844 |
+
},
|
43845 |
+
{
|
43846 |
+
"epoch": 0.9446455505279034,
|
43847 |
+
"grad_norm": 2.1474413871765137,
|
43848 |
+
"learning_rate": 7.773506541358333e-07,
|
43849 |
+
"loss": 1.2224,
|
43850 |
+
"step": 6263
|
43851 |
+
},
|
43852 |
+
{
|
43853 |
+
"epoch": 0.9447963800904977,
|
43854 |
+
"grad_norm": 1.9672069549560547,
|
43855 |
+
"learning_rate": 7.731311185610268e-07,
|
43856 |
+
"loss": 1.1663,
|
43857 |
+
"step": 6264
|
43858 |
+
},
|
43859 |
+
{
|
43860 |
+
"epoch": 0.944947209653092,
|
43861 |
+
"grad_norm": 1.8437694311141968,
|
43862 |
+
"learning_rate": 7.689229769697604e-07,
|
43863 |
+
"loss": 0.9525,
|
43864 |
+
"step": 6265
|
43865 |
+
},
|
43866 |
+
{
|
43867 |
+
"epoch": 0.9450980392156862,
|
43868 |
+
"grad_norm": 1.9961206912994385,
|
43869 |
+
"learning_rate": 7.64726230336027e-07,
|
43870 |
+
"loss": 1.0964,
|
43871 |
+
"step": 6266
|
43872 |
+
},
|
43873 |
+
{
|
43874 |
+
"epoch": 0.9452488687782805,
|
43875 |
+
"grad_norm": 1.8415944576263428,
|
43876 |
+
"learning_rate": 7.605408796312164e-07,
|
43877 |
+
"loss": 1.0519,
|
43878 |
+
"step": 6267
|
43879 |
+
},
|
43880 |
+
{
|
43881 |
+
"epoch": 0.9453996983408748,
|
43882 |
+
"grad_norm": 1.9274911880493164,
|
43883 |
+
"learning_rate": 7.563669258240535e-07,
|
43884 |
+
"loss": 1.1,
|
43885 |
+
"step": 6268
|
43886 |
+
},
|
43887 |
+
{
|
43888 |
+
"epoch": 0.945550527903469,
|
43889 |
+
"grad_norm": 1.9660152196884155,
|
43890 |
+
"learning_rate": 7.522043698806436e-07,
|
43891 |
+
"loss": 1.1443,
|
43892 |
+
"step": 6269
|
43893 |
+
},
|
43894 |
+
{
|
43895 |
+
"epoch": 0.9457013574660633,
|
43896 |
+
"grad_norm": 1.7958331108093262,
|
43897 |
+
"learning_rate": 7.480532127644435e-07,
|
43898 |
+
"loss": 0.9059,
|
43899 |
+
"step": 6270
|
43900 |
+
},
|
43901 |
+
{
|
43902 |
+
"epoch": 0.9458521870286576,
|
43903 |
+
"grad_norm": 2.053165912628174,
|
43904 |
+
"learning_rate": 7.439134554362681e-07,
|
43905 |
+
"loss": 1.2559,
|
43906 |
+
"step": 6271
|
43907 |
+
},
|
43908 |
+
{
|
43909 |
+
"epoch": 0.9460030165912519,
|
43910 |
+
"grad_norm": 1.9057128429412842,
|
43911 |
+
"learning_rate": 7.397850988543065e-07,
|
43912 |
+
"loss": 1.0629,
|
43913 |
+
"step": 6272
|
43914 |
+
},
|
43915 |
+
{
|
43916 |
+
"epoch": 0.9461538461538461,
|
43917 |
+
"grad_norm": 1.7405214309692383,
|
43918 |
+
"learning_rate": 7.356681439740998e-07,
|
43919 |
+
"loss": 0.958,
|
43920 |
+
"step": 6273
|
43921 |
+
},
|
43922 |
+
{
|
43923 |
+
"epoch": 0.9463046757164404,
|
43924 |
+
"grad_norm": 1.8470877408981323,
|
43925 |
+
"learning_rate": 7.315625917485525e-07,
|
43926 |
+
"loss": 0.9393,
|
43927 |
+
"step": 6274
|
43928 |
+
},
|
43929 |
+
{
|
43930 |
+
"epoch": 0.9464555052790347,
|
43931 |
+
"grad_norm": 1.862983226776123,
|
43932 |
+
"learning_rate": 7.274684431279378e-07,
|
43933 |
+
"loss": 0.8262,
|
43934 |
+
"step": 6275
|
43935 |
+
},
|
43936 |
+
{
|
43937 |
+
"epoch": 0.9466063348416289,
|
43938 |
+
"grad_norm": 2.055889844894409,
|
43939 |
+
"learning_rate": 7.233856990598697e-07,
|
43940 |
+
"loss": 1.1398,
|
43941 |
+
"step": 6276
|
43942 |
+
},
|
43943 |
+
{
|
43944 |
+
"epoch": 0.9467571644042232,
|
43945 |
+
"grad_norm": 1.97694730758667,
|
43946 |
+
"learning_rate": 7.193143604893426e-07,
|
43947 |
+
"loss": 1.0823,
|
43948 |
+
"step": 6277
|
43949 |
+
},
|
43950 |
+
{
|
43951 |
+
"epoch": 0.9469079939668175,
|
43952 |
+
"grad_norm": 1.692892074584961,
|
43953 |
+
"learning_rate": 7.152544283586971e-07,
|
43954 |
+
"loss": 0.7331,
|
43955 |
+
"step": 6278
|
43956 |
+
},
|
43957 |
+
{
|
43958 |
+
"epoch": 0.9470588235294117,
|
43959 |
+
"grad_norm": 1.8486021757125854,
|
43960 |
+
"learning_rate": 7.11205903607648e-07,
|
43961 |
+
"loss": 0.8909,
|
43962 |
+
"step": 6279
|
43963 |
+
},
|
43964 |
+
{
|
43965 |
+
"epoch": 0.947209653092006,
|
43966 |
+
"grad_norm": 2.050459146499634,
|
43967 |
+
"learning_rate": 7.071687871732513e-07,
|
43968 |
+
"loss": 1.0519,
|
43969 |
+
"step": 6280
|
43970 |
+
},
|
43971 |
+
{
|
43972 |
+
"epoch": 0.9473604826546003,
|
43973 |
+
"grad_norm": 2.027438163757324,
|
43974 |
+
"learning_rate": 7.03143079989943e-07,
|
43975 |
+
"loss": 1.1911,
|
43976 |
+
"step": 6281
|
43977 |
+
},
|
43978 |
+
{
|
43979 |
+
"epoch": 0.9475113122171945,
|
43980 |
+
"grad_norm": 2.2774291038513184,
|
43981 |
+
"learning_rate": 6.991287829894999e-07,
|
43982 |
+
"loss": 1.0686,
|
43983 |
+
"step": 6282
|
43984 |
+
},
|
43985 |
+
{
|
43986 |
+
"epoch": 0.9476621417797888,
|
43987 |
+
"grad_norm": 2.0271224975585938,
|
43988 |
+
"learning_rate": 6.951258971010677e-07,
|
43989 |
+
"loss": 0.9588,
|
43990 |
+
"step": 6283
|
43991 |
+
},
|
43992 |
+
{
|
43993 |
+
"epoch": 0.9478129713423831,
|
43994 |
+
"grad_norm": 2.0257952213287354,
|
43995 |
+
"learning_rate": 6.911344232511496e-07,
|
43996 |
+
"loss": 1.0368,
|
43997 |
+
"step": 6284
|
43998 |
+
},
|
43999 |
+
{
|
44000 |
+
"epoch": 0.9479638009049773,
|
44001 |
+
"grad_norm": 1.8290969133377075,
|
44002 |
+
"learning_rate": 6.871543623636012e-07,
|
44003 |
+
"loss": 0.9618,
|
44004 |
+
"step": 6285
|
44005 |
+
},
|
44006 |
+
{
|
44007 |
+
"epoch": 0.9481146304675716,
|
44008 |
+
"grad_norm": 1.9377784729003906,
|
44009 |
+
"learning_rate": 6.831857153596521e-07,
|
44010 |
+
"loss": 1.1191,
|
44011 |
+
"step": 6286
|
44012 |
+
},
|
44013 |
+
{
|
44014 |
+
"epoch": 0.9482654600301659,
|
44015 |
+
"grad_norm": 2.043879270553589,
|
44016 |
+
"learning_rate": 6.792284831578733e-07,
|
44017 |
+
"loss": 1.1668,
|
44018 |
+
"step": 6287
|
44019 |
+
},
|
44020 |
+
{
|
44021 |
+
"epoch": 0.9484162895927601,
|
44022 |
+
"grad_norm": 1.8056532144546509,
|
44023 |
+
"learning_rate": 6.752826666742041e-07,
|
44024 |
+
"loss": 0.7392,
|
44025 |
+
"step": 6288
|
44026 |
+
},
|
44027 |
+
{
|
44028 |
+
"epoch": 0.9485671191553544,
|
44029 |
+
"grad_norm": 2.201486349105835,
|
44030 |
+
"learning_rate": 6.713482668219362e-07,
|
44031 |
+
"loss": 1.4299,
|
44032 |
+
"step": 6289
|
44033 |
+
},
|
44034 |
+
{
|
44035 |
+
"epoch": 0.9487179487179487,
|
44036 |
+
"grad_norm": 2.0214931964874268,
|
44037 |
+
"learning_rate": 6.674252845117191e-07,
|
44038 |
+
"loss": 1.1641,
|
44039 |
+
"step": 6290
|
44040 |
+
},
|
44041 |
+
{
|
44042 |
+
"epoch": 0.948868778280543,
|
44043 |
+
"grad_norm": 2.255383253097534,
|
44044 |
+
"learning_rate": 6.63513720651554e-07,
|
44045 |
+
"loss": 1.1026,
|
44046 |
+
"step": 6291
|
44047 |
+
},
|
44048 |
+
{
|
44049 |
+
"epoch": 0.9490196078431372,
|
44050 |
+
"grad_norm": 2.57189679145813,
|
44051 |
+
"learning_rate": 6.596135761468225e-07,
|
44052 |
+
"loss": 1.478,
|
44053 |
+
"step": 6292
|
44054 |
+
},
|
44055 |
+
{
|
44056 |
+
"epoch": 0.9491704374057315,
|
44057 |
+
"grad_norm": 2.4986143112182617,
|
44058 |
+
"learning_rate": 6.557248519002302e-07,
|
44059 |
+
"loss": 1.6896,
|
44060 |
+
"step": 6293
|
44061 |
+
},
|
44062 |
+
{
|
44063 |
+
"epoch": 0.9493212669683257,
|
44064 |
+
"grad_norm": 2.127760410308838,
|
44065 |
+
"learning_rate": 6.51847548811868e-07,
|
44066 |
+
"loss": 1.3076,
|
44067 |
+
"step": 6294
|
44068 |
+
},
|
44069 |
+
{
|
44070 |
+
"epoch": 0.94947209653092,
|
44071 |
+
"grad_norm": 2.108933925628662,
|
44072 |
+
"learning_rate": 6.479816677791683e-07,
|
44073 |
+
"loss": 1.0977,
|
44074 |
+
"step": 6295
|
44075 |
+
},
|
44076 |
+
{
|
44077 |
+
"epoch": 0.9496229260935143,
|
44078 |
+
"grad_norm": 1.5710909366607666,
|
44079 |
+
"learning_rate": 6.441272096969153e-07,
|
44080 |
+
"loss": 0.6389,
|
44081 |
+
"step": 6296
|
44082 |
+
},
|
44083 |
+
{
|
44084 |
+
"epoch": 0.9497737556561086,
|
44085 |
+
"grad_norm": 1.3058656454086304,
|
44086 |
+
"learning_rate": 6.402841754572675e-07,
|
44087 |
+
"loss": 0.5123,
|
44088 |
+
"step": 6297
|
44089 |
+
},
|
44090 |
+
{
|
44091 |
+
"epoch": 0.9499245852187028,
|
44092 |
+
"grad_norm": 1.7123138904571533,
|
44093 |
+
"learning_rate": 6.364525659497189e-07,
|
44094 |
+
"loss": 0.7761,
|
44095 |
+
"step": 6298
|
44096 |
+
},
|
44097 |
+
{
|
44098 |
+
"epoch": 0.9500754147812971,
|
44099 |
+
"grad_norm": 1.6542720794677734,
|
44100 |
+
"learning_rate": 6.326323820611379e-07,
|
44101 |
+
"loss": 0.7791,
|
44102 |
+
"step": 6299
|
44103 |
+
},
|
44104 |
+
{
|
44105 |
+
"epoch": 0.9502262443438914,
|
44106 |
+
"grad_norm": 2.116077184677124,
|
44107 |
+
"learning_rate": 6.288236246757284e-07,
|
44108 |
+
"loss": 1.0661,
|
44109 |
+
"step": 6300
|
44110 |
+
},
|
44111 |
+
{
|
44112 |
+
"epoch": 0.9503770739064856,
|
44113 |
+
"grad_norm": 1.7709579467773438,
|
44114 |
+
"learning_rate": 6.250262946750685e-07,
|
44115 |
+
"loss": 1.0497,
|
44116 |
+
"step": 6301
|
44117 |
+
},
|
44118 |
+
{
|
44119 |
+
"epoch": 0.9505279034690799,
|
44120 |
+
"grad_norm": 1.7317759990692139,
|
44121 |
+
"learning_rate": 6.212403929380772e-07,
|
44122 |
+
"loss": 1.007,
|
44123 |
+
"step": 6302
|
44124 |
+
},
|
44125 |
+
{
|
44126 |
+
"epoch": 0.9506787330316742,
|
44127 |
+
"grad_norm": 1.5356085300445557,
|
44128 |
+
"learning_rate": 6.174659203410371e-07,
|
44129 |
+
"loss": 0.8161,
|
44130 |
+
"step": 6303
|
44131 |
+
},
|
44132 |
+
{
|
44133 |
+
"epoch": 0.9508295625942684,
|
44134 |
+
"grad_norm": 1.9997459650039673,
|
44135 |
+
"learning_rate": 6.137028777575826e-07,
|
44136 |
+
"loss": 1.3262,
|
44137 |
+
"step": 6304
|
44138 |
+
},
|
44139 |
+
{
|
44140 |
+
"epoch": 0.9509803921568627,
|
44141 |
+
"grad_norm": 1.7967150211334229,
|
44142 |
+
"learning_rate": 6.099512660587059e-07,
|
44143 |
+
"loss": 1.1945,
|
44144 |
+
"step": 6305
|
44145 |
+
},
|
44146 |
+
{
|
44147 |
+
"epoch": 0.951131221719457,
|
44148 |
+
"grad_norm": 1.8450052738189697,
|
44149 |
+
"learning_rate": 6.062110861127402e-07,
|
44150 |
+
"loss": 1.1252,
|
44151 |
+
"step": 6306
|
44152 |
+
},
|
44153 |
+
{
|
44154 |
+
"epoch": 0.9512820512820512,
|
44155 |
+
"grad_norm": 1.8019013404846191,
|
44156 |
+
"learning_rate": 6.024823387853928e-07,
|
44157 |
+
"loss": 0.8986,
|
44158 |
+
"step": 6307
|
44159 |
+
},
|
44160 |
+
{
|
44161 |
+
"epoch": 0.9514328808446455,
|
44162 |
+
"grad_norm": 1.6633667945861816,
|
44163 |
+
"learning_rate": 5.987650249397125e-07,
|
44164 |
+
"loss": 0.9361,
|
44165 |
+
"step": 6308
|
44166 |
+
},
|
44167 |
+
{
|
44168 |
+
"epoch": 0.9515837104072398,
|
44169 |
+
"grad_norm": 1.9914895296096802,
|
44170 |
+
"learning_rate": 5.950591454360943e-07,
|
44171 |
+
"loss": 1.2805,
|
44172 |
+
"step": 6309
|
44173 |
+
},
|
44174 |
+
{
|
44175 |
+
"epoch": 0.951734539969834,
|
44176 |
+
"grad_norm": 1.905820608139038,
|
44177 |
+
"learning_rate": 5.913647011323075e-07,
|
44178 |
+
"loss": 1.1174,
|
44179 |
+
"step": 6310
|
44180 |
+
},
|
44181 |
+
{
|
44182 |
+
"epoch": 0.9518853695324283,
|
44183 |
+
"grad_norm": 1.832431435585022,
|
44184 |
+
"learning_rate": 5.876816928834572e-07,
|
44185 |
+
"loss": 1.1125,
|
44186 |
+
"step": 6311
|
44187 |
+
},
|
44188 |
+
{
|
44189 |
+
"epoch": 0.9520361990950226,
|
44190 |
+
"grad_norm": 1.762558102607727,
|
44191 |
+
"learning_rate": 5.840101215420057e-07,
|
44192 |
+
"loss": 1.0359,
|
44193 |
+
"step": 6312
|
44194 |
+
},
|
44195 |
+
{
|
44196 |
+
"epoch": 0.9521870286576168,
|
44197 |
+
"grad_norm": 1.7604293823242188,
|
44198 |
+
"learning_rate": 5.803499879577734e-07,
|
44199 |
+
"loss": 1.0938,
|
44200 |
+
"step": 6313
|
44201 |
+
},
|
44202 |
+
{
|
44203 |
+
"epoch": 0.9523378582202111,
|
44204 |
+
"grad_norm": 2.041506767272949,
|
44205 |
+
"learning_rate": 5.767012929779325e-07,
|
44206 |
+
"loss": 1.0922,
|
44207 |
+
"step": 6314
|
44208 |
+
},
|
44209 |
+
{
|
44210 |
+
"epoch": 0.9524886877828054,
|
44211 |
+
"grad_norm": 1.8552873134613037,
|
44212 |
+
"learning_rate": 5.73064037447002e-07,
|
44213 |
+
"loss": 0.9393,
|
44214 |
+
"step": 6315
|
44215 |
+
},
|
44216 |
+
{
|
44217 |
+
"epoch": 0.9526395173453996,
|
44218 |
+
"grad_norm": 1.5460180044174194,
|
44219 |
+
"learning_rate": 5.694382222068528e-07,
|
44220 |
+
"loss": 0.8098,
|
44221 |
+
"step": 6316
|
44222 |
+
},
|
44223 |
+
{
|
44224 |
+
"epoch": 0.9527903469079939,
|
44225 |
+
"grad_norm": 1.542466640472412,
|
44226 |
+
"learning_rate": 5.658238480967137e-07,
|
44227 |
+
"loss": 0.7045,
|
44228 |
+
"step": 6317
|
44229 |
+
},
|
44230 |
+
{
|
44231 |
+
"epoch": 0.9529411764705882,
|
44232 |
+
"grad_norm": 1.8291473388671875,
|
44233 |
+
"learning_rate": 5.622209159531655e-07,
|
44234 |
+
"loss": 1.1387,
|
44235 |
+
"step": 6318
|
44236 |
+
},
|
44237 |
+
{
|
44238 |
+
"epoch": 0.9530920060331824,
|
44239 |
+
"grad_norm": 1.950031042098999,
|
44240 |
+
"learning_rate": 5.586294266101355e-07,
|
44241 |
+
"loss": 1.0013,
|
44242 |
+
"step": 6319
|
44243 |
+
},
|
44244 |
+
{
|
44245 |
+
"epoch": 0.9532428355957768,
|
44246 |
+
"grad_norm": 1.5500978231430054,
|
44247 |
+
"learning_rate": 5.550493808989032e-07,
|
44248 |
+
"loss": 0.873,
|
44249 |
+
"step": 6320
|
44250 |
+
},
|
44251 |
+
{
|
44252 |
+
"epoch": 0.9533936651583711,
|
44253 |
+
"grad_norm": 2.1455116271972656,
|
44254 |
+
"learning_rate": 5.514807796481003e-07,
|
44255 |
+
"loss": 1.3643,
|
44256 |
+
"step": 6321
|
44257 |
+
},
|
44258 |
+
{
|
44259 |
+
"epoch": 0.9535444947209654,
|
44260 |
+
"grad_norm": 1.77354896068573,
|
44261 |
+
"learning_rate": 5.47923623683716e-07,
|
44262 |
+
"loss": 0.8939,
|
44263 |
+
"step": 6322
|
44264 |
+
},
|
44265 |
+
{
|
44266 |
+
"epoch": 0.9536953242835596,
|
44267 |
+
"grad_norm": 1.7942193746566772,
|
44268 |
+
"learning_rate": 5.443779138290806e-07,
|
44269 |
+
"loss": 0.8917,
|
44270 |
+
"step": 6323
|
44271 |
+
},
|
44272 |
+
{
|
44273 |
+
"epoch": 0.9538461538461539,
|
44274 |
+
"grad_norm": 1.7935692071914673,
|
44275 |
+
"learning_rate": 5.408436509048819e-07,
|
44276 |
+
"loss": 0.839,
|
44277 |
+
"step": 6324
|
44278 |
+
},
|
44279 |
+
{
|
44280 |
+
"epoch": 0.9539969834087482,
|
44281 |
+
"grad_norm": 1.6746853590011597,
|
44282 |
+
"learning_rate": 5.373208357291493e-07,
|
44283 |
+
"loss": 0.7705,
|
44284 |
+
"step": 6325
|
44285 |
+
},
|
44286 |
+
{
|
44287 |
+
"epoch": 0.9541478129713424,
|
44288 |
+
"grad_norm": 1.8481478691101074,
|
44289 |
+
"learning_rate": 5.338094691172801e-07,
|
44290 |
+
"loss": 0.925,
|
44291 |
+
"step": 6326
|
44292 |
+
},
|
44293 |
+
{
|
44294 |
+
"epoch": 0.9542986425339367,
|
44295 |
+
"grad_norm": 1.7488939762115479,
|
44296 |
+
"learning_rate": 5.303095518819967e-07,
|
44297 |
+
"loss": 0.8289,
|
44298 |
+
"step": 6327
|
44299 |
+
},
|
44300 |
+
{
|
44301 |
+
"epoch": 0.954449472096531,
|
44302 |
+
"grad_norm": 2.2121334075927734,
|
44303 |
+
"learning_rate": 5.268210848333954e-07,
|
44304 |
+
"loss": 1.4356,
|
44305 |
+
"step": 6328
|
44306 |
+
},
|
44307 |
+
{
|
44308 |
+
"epoch": 0.9546003016591252,
|
44309 |
+
"grad_norm": 1.872408151626587,
|
44310 |
+
"learning_rate": 5.233440687789082e-07,
|
44311 |
+
"loss": 0.9361,
|
44312 |
+
"step": 6329
|
44313 |
+
},
|
44314 |
+
{
|
44315 |
+
"epoch": 0.9547511312217195,
|
44316 |
+
"grad_norm": 2.0881173610687256,
|
44317 |
+
"learning_rate": 5.198785045233245e-07,
|
44318 |
+
"loss": 1.451,
|
44319 |
+
"step": 6330
|
44320 |
+
},
|
44321 |
+
{
|
44322 |
+
"epoch": 0.9549019607843138,
|
44323 |
+
"grad_norm": 1.886342167854309,
|
44324 |
+
"learning_rate": 5.164243928687695e-07,
|
44325 |
+
"loss": 0.8034,
|
44326 |
+
"step": 6331
|
44327 |
+
},
|
44328 |
+
{
|
44329 |
+
"epoch": 0.955052790346908,
|
44330 |
+
"grad_norm": 2.0379743576049805,
|
44331 |
+
"learning_rate": 5.129817346147369e-07,
|
44332 |
+
"loss": 1.2724,
|
44333 |
+
"step": 6332
|
44334 |
+
},
|
44335 |
+
{
|
44336 |
+
"epoch": 0.9552036199095023,
|
44337 |
+
"grad_norm": 1.8097858428955078,
|
44338 |
+
"learning_rate": 5.095505305580561e-07,
|
44339 |
+
"loss": 0.9383,
|
44340 |
+
"step": 6333
|
44341 |
+
},
|
44342 |
+
{
|
44343 |
+
"epoch": 0.9553544494720966,
|
44344 |
+
"grad_norm": 1.8055602312088013,
|
44345 |
+
"learning_rate": 5.061307814929028e-07,
|
44346 |
+
"loss": 0.9776,
|
44347 |
+
"step": 6334
|
44348 |
+
},
|
44349 |
+
{
|
44350 |
+
"epoch": 0.9555052790346908,
|
44351 |
+
"grad_norm": 1.750784993171692,
|
44352 |
+
"learning_rate": 5.027224882108216e-07,
|
44353 |
+
"loss": 0.6974,
|
44354 |
+
"step": 6335
|
44355 |
+
},
|
44356 |
+
{
|
44357 |
+
"epoch": 0.9556561085972851,
|
44358 |
+
"grad_norm": 1.8835489749908447,
|
44359 |
+
"learning_rate": 4.993256515006817e-07,
|
44360 |
+
"loss": 0.9747,
|
44361 |
+
"step": 6336
|
44362 |
+
},
|
44363 |
+
{
|
44364 |
+
"epoch": 0.9558069381598794,
|
44365 |
+
"grad_norm": 2.237809658050537,
|
44366 |
+
"learning_rate": 4.959402721487094e-07,
|
44367 |
+
"loss": 1.1552,
|
44368 |
+
"step": 6337
|
44369 |
+
},
|
44370 |
+
{
|
44371 |
+
"epoch": 0.9559577677224737,
|
44372 |
+
"grad_norm": 2.2509021759033203,
|
44373 |
+
"learning_rate": 4.925663509384782e-07,
|
44374 |
+
"loss": 1.2622,
|
44375 |
+
"step": 6338
|
44376 |
+
},
|
44377 |
+
{
|
44378 |
+
"epoch": 0.9561085972850679,
|
44379 |
+
"grad_norm": 1.9245855808258057,
|
44380 |
+
"learning_rate": 4.892038886509242e-07,
|
44381 |
+
"loss": 0.9188,
|
44382 |
+
"step": 6339
|
44383 |
+
},
|
44384 |
+
{
|
44385 |
+
"epoch": 0.9562594268476622,
|
44386 |
+
"grad_norm": 2.145233392715454,
|
44387 |
+
"learning_rate": 4.85852886064303e-07,
|
44388 |
+
"loss": 1.0986,
|
44389 |
+
"step": 6340
|
44390 |
+
},
|
44391 |
+
{
|
44392 |
+
"epoch": 0.9564102564102565,
|
44393 |
+
"grad_norm": 1.804188847541809,
|
44394 |
+
"learning_rate": 4.825133439542385e-07,
|
44395 |
+
"loss": 0.923,
|
44396 |
+
"step": 6341
|
44397 |
+
},
|
44398 |
+
{
|
44399 |
+
"epoch": 0.9565610859728507,
|
44400 |
+
"grad_norm": 1.800316572189331,
|
44401 |
+
"learning_rate": 4.79185263093701e-07,
|
44402 |
+
"loss": 0.9414,
|
44403 |
+
"step": 6342
|
44404 |
+
},
|
44405 |
+
{
|
44406 |
+
"epoch": 0.956711915535445,
|
44407 |
+
"grad_norm": 2.021705389022827,
|
44408 |
+
"learning_rate": 4.758686442529969e-07,
|
44409 |
+
"loss": 0.9034,
|
44410 |
+
"step": 6343
|
44411 |
+
},
|
44412 |
+
{
|
44413 |
+
"epoch": 0.9568627450980393,
|
44414 |
+
"grad_norm": 1.8886138200759888,
|
44415 |
+
"learning_rate": 4.725634881997898e-07,
|
44416 |
+
"loss": 0.9878,
|
44417 |
+
"step": 6344
|
44418 |
+
},
|
44419 |
+
{
|
44420 |
+
"epoch": 0.9570135746606335,
|
44421 |
+
"grad_norm": 2.104884386062622,
|
44422 |
+
"learning_rate": 4.692697956990899e-07,
|
44423 |
+
"loss": 1.074,
|
44424 |
+
"step": 6345
|
44425 |
+
},
|
44426 |
+
{
|
44427 |
+
"epoch": 0.9571644042232278,
|
44428 |
+
"grad_norm": 2.120182991027832,
|
44429 |
+
"learning_rate": 4.6598756751324857e-07,
|
44430 |
+
"loss": 0.9175,
|
44431 |
+
"step": 6346
|
44432 |
+
},
|
44433 |
+
{
|
44434 |
+
"epoch": 0.9573152337858221,
|
44435 |
+
"grad_norm": 1.8088730573654175,
|
44436 |
+
"learning_rate": 4.6271680440195806e-07,
|
44437 |
+
"loss": 0.8787,
|
44438 |
+
"step": 6347
|
44439 |
+
},
|
44440 |
+
{
|
44441 |
+
"epoch": 0.9574660633484163,
|
44442 |
+
"grad_norm": 1.7701196670532227,
|
44443 |
+
"learning_rate": 4.59457507122274e-07,
|
44444 |
+
"loss": 0.7213,
|
44445 |
+
"step": 6348
|
44446 |
+
},
|
44447 |
+
{
|
44448 |
+
"epoch": 0.9576168929110106,
|
44449 |
+
"grad_norm": 2.178802967071533,
|
44450 |
+
"learning_rate": 4.562096764285817e-07,
|
44451 |
+
"loss": 0.9971,
|
44452 |
+
"step": 6349
|
44453 |
+
},
|
44454 |
+
{
|
44455 |
+
"epoch": 0.9577677224736049,
|
44456 |
+
"grad_norm": 1.5018692016601562,
|
44457 |
+
"learning_rate": 4.529733130726299e-07,
|
44458 |
+
"loss": 0.5859,
|
44459 |
+
"step": 6350
|
44460 |
+
},
|
44461 |
+
{
|
44462 |
+
"epoch": 0.9579185520361991,
|
44463 |
+
"grad_norm": 1.998622179031372,
|
44464 |
+
"learning_rate": 4.4974841780349163e-07,
|
44465 |
+
"loss": 1.3461,
|
44466 |
+
"step": 6351
|
44467 |
+
},
|
44468 |
+
{
|
44469 |
+
"epoch": 0.9580693815987934,
|
44470 |
+
"grad_norm": 1.9802663326263428,
|
44471 |
+
"learning_rate": 4.465349913676031e-07,
|
44472 |
+
"loss": 1.187,
|
44473 |
+
"step": 6352
|
44474 |
+
},
|
44475 |
+
{
|
44476 |
+
"epoch": 0.9582202111613877,
|
44477 |
+
"grad_norm": 1.7344826459884644,
|
44478 |
+
"learning_rate": 4.4333303450873607e-07,
|
44479 |
+
"loss": 0.9543,
|
44480 |
+
"step": 6353
|
44481 |
+
},
|
44482 |
+
{
|
44483 |
+
"epoch": 0.9583710407239819,
|
44484 |
+
"grad_norm": 1.9702990055084229,
|
44485 |
+
"learning_rate": 4.401425479680199e-07,
|
44486 |
+
"loss": 1.2531,
|
44487 |
+
"step": 6354
|
44488 |
+
},
|
44489 |
+
{
|
44490 |
+
"epoch": 0.9585218702865762,
|
44491 |
+
"grad_norm": 2.1435110569000244,
|
44492 |
+
"learning_rate": 4.369635324839083e-07,
|
44493 |
+
"loss": 1.2078,
|
44494 |
+
"step": 6355
|
44495 |
+
},
|
44496 |
+
{
|
44497 |
+
"epoch": 0.9586726998491705,
|
44498 |
+
"grad_norm": 1.9066894054412842,
|
44499 |
+
"learning_rate": 4.3379598879221825e-07,
|
44500 |
+
"loss": 1.3334,
|
44501 |
+
"step": 6356
|
44502 |
+
},
|
44503 |
+
{
|
44504 |
+
"epoch": 0.9588235294117647,
|
44505 |
+
"grad_norm": 1.7345263957977295,
|
44506 |
+
"learning_rate": 4.306399176261022e-07,
|
44507 |
+
"loss": 0.9146,
|
44508 |
+
"step": 6357
|
44509 |
+
},
|
44510 |
+
{
|
44511 |
+
"epoch": 0.958974358974359,
|
44512 |
+
"grad_norm": 1.7452151775360107,
|
44513 |
+
"learning_rate": 4.274953197160647e-07,
|
44514 |
+
"loss": 1.0763,
|
44515 |
+
"step": 6358
|
44516 |
+
},
|
44517 |
+
{
|
44518 |
+
"epoch": 0.9591251885369533,
|
44519 |
+
"grad_norm": 1.8526966571807861,
|
44520 |
+
"learning_rate": 4.243621957899457e-07,
|
44521 |
+
"loss": 1.201,
|
44522 |
+
"step": 6359
|
44523 |
+
},
|
44524 |
+
{
|
44525 |
+
"epoch": 0.9592760180995475,
|
44526 |
+
"grad_norm": 1.969795823097229,
|
44527 |
+
"learning_rate": 4.2124054657293187e-07,
|
44528 |
+
"loss": 1.0364,
|
44529 |
+
"step": 6360
|
44530 |
+
},
|
44531 |
+
{
|
44532 |
+
"epoch": 0.9594268476621418,
|
44533 |
+
"grad_norm": 1.8643343448638916,
|
44534 |
+
"learning_rate": 4.1813037278756184e-07,
|
44535 |
+
"loss": 0.9283,
|
44536 |
+
"step": 6361
|
44537 |
+
},
|
44538 |
+
{
|
44539 |
+
"epoch": 0.9595776772247361,
|
44540 |
+
"grad_norm": 1.9766918420791626,
|
44541 |
+
"learning_rate": 4.1503167515370976e-07,
|
44542 |
+
"loss": 1.1157,
|
44543 |
+
"step": 6362
|
44544 |
+
},
|
44545 |
+
{
|
44546 |
+
"epoch": 0.9597285067873303,
|
44547 |
+
"grad_norm": 1.9669005870819092,
|
44548 |
+
"learning_rate": 4.119444543885964e-07,
|
44549 |
+
"loss": 1.1146,
|
44550 |
+
"step": 6363
|
44551 |
+
},
|
44552 |
+
{
|
44553 |
+
"epoch": 0.9598793363499246,
|
44554 |
+
"grad_norm": 1.802460789680481,
|
44555 |
+
"learning_rate": 4.0886871120678903e-07,
|
44556 |
+
"loss": 1.0177,
|
44557 |
+
"step": 6364
|
44558 |
+
},
|
44559 |
+
{
|
44560 |
+
"epoch": 0.9600301659125189,
|
44561 |
+
"grad_norm": 1.8280365467071533,
|
44562 |
+
"learning_rate": 4.058044463201849e-07,
|
44563 |
+
"loss": 1.0281,
|
44564 |
+
"step": 6365
|
44565 |
+
},
|
44566 |
+
{
|
44567 |
+
"epoch": 0.9601809954751132,
|
44568 |
+
"grad_norm": 1.9677125215530396,
|
44569 |
+
"learning_rate": 4.027516604380388e-07,
|
44570 |
+
"loss": 1.0923,
|
44571 |
+
"step": 6366
|
44572 |
+
},
|
44573 |
+
{
|
44574 |
+
"epoch": 0.9603318250377074,
|
44575 |
+
"grad_norm": 2.0572264194488525,
|
44576 |
+
"learning_rate": 3.9971035426695226e-07,
|
44577 |
+
"loss": 1.2239,
|
44578 |
+
"step": 6367
|
44579 |
+
},
|
44580 |
+
{
|
44581 |
+
"epoch": 0.9604826546003017,
|
44582 |
+
"grad_norm": 2.10935115814209,
|
44583 |
+
"learning_rate": 3.96680528510851e-07,
|
44584 |
+
"loss": 1.2413,
|
44585 |
+
"step": 6368
|
44586 |
+
},
|
44587 |
+
{
|
44588 |
+
"epoch": 0.960633484162896,
|
44589 |
+
"grad_norm": 1.6900520324707031,
|
44590 |
+
"learning_rate": 3.936621838710186e-07,
|
44591 |
+
"loss": 0.8827,
|
44592 |
+
"step": 6369
|
44593 |
+
},
|
44594 |
+
{
|
44595 |
+
"epoch": 0.9607843137254902,
|
44596 |
+
"grad_norm": 2.0149905681610107,
|
44597 |
+
"learning_rate": 3.906553210460795e-07,
|
44598 |
+
"loss": 0.9325,
|
44599 |
+
"step": 6370
|
44600 |
+
},
|
44601 |
+
{
|
44602 |
+
"epoch": 0.9609351432880845,
|
44603 |
+
"grad_norm": 1.7919515371322632,
|
44604 |
+
"learning_rate": 3.8765994073199366e-07,
|
44605 |
+
"loss": 0.8165,
|
44606 |
+
"step": 6371
|
44607 |
+
},
|
44608 |
+
{
|
44609 |
+
"epoch": 0.9610859728506788,
|
44610 |
+
"grad_norm": 1.882252812385559,
|
44611 |
+
"learning_rate": 3.8467604362206753e-07,
|
44612 |
+
"loss": 1.0458,
|
44613 |
+
"step": 6372
|
44614 |
+
},
|
44615 |
+
{
|
44616 |
+
"epoch": 0.961236802413273,
|
44617 |
+
"grad_norm": 1.703015923500061,
|
44618 |
+
"learning_rate": 3.8170363040695435e-07,
|
44619 |
+
"loss": 0.8059,
|
44620 |
+
"step": 6373
|
44621 |
+
},
|
44622 |
+
{
|
44623 |
+
"epoch": 0.9613876319758673,
|
44624 |
+
"grad_norm": 1.6983740329742432,
|
44625 |
+
"learning_rate": 3.787427017746481e-07,
|
44626 |
+
"loss": 0.7853,
|
44627 |
+
"step": 6374
|
44628 |
+
},
|
44629 |
+
{
|
44630 |
+
"epoch": 0.9615384615384616,
|
44631 |
+
"grad_norm": 1.5398797988891602,
|
44632 |
+
"learning_rate": 3.757932584104673e-07,
|
44633 |
+
"loss": 0.6919,
|
44634 |
+
"step": 6375
|
44635 |
+
},
|
44636 |
+
{
|
44637 |
+
"epoch": 0.9616892911010558,
|
44638 |
+
"grad_norm": 1.4278013706207275,
|
44639 |
+
"learning_rate": 3.728553009970992e-07,
|
44640 |
+
"loss": 0.638,
|
44641 |
+
"step": 6376
|
44642 |
+
},
|
44643 |
+
{
|
44644 |
+
"epoch": 0.9618401206636501,
|
44645 |
+
"grad_norm": 1.8496487140655518,
|
44646 |
+
"learning_rate": 3.6992883021455537e-07,
|
44647 |
+
"loss": 0.8303,
|
44648 |
+
"step": 6377
|
44649 |
+
},
|
44650 |
+
{
|
44651 |
+
"epoch": 0.9619909502262444,
|
44652 |
+
"grad_norm": 2.1084578037261963,
|
44653 |
+
"learning_rate": 3.6701384674018845e-07,
|
44654 |
+
"loss": 1.1322,
|
44655 |
+
"step": 6378
|
44656 |
+
},
|
44657 |
+
{
|
44658 |
+
"epoch": 0.9621417797888386,
|
44659 |
+
"grad_norm": 1.3949884176254272,
|
44660 |
+
"learning_rate": 3.6411035124870317e-07,
|
44661 |
+
"loss": 0.6075,
|
44662 |
+
"step": 6379
|
44663 |
+
},
|
44664 |
+
{
|
44665 |
+
"epoch": 0.9622926093514329,
|
44666 |
+
"grad_norm": 1.922567367553711,
|
44667 |
+
"learning_rate": 3.612183444121342e-07,
|
44668 |
+
"loss": 1.0072,
|
44669 |
+
"step": 6380
|
44670 |
+
},
|
44671 |
+
{
|
44672 |
+
"epoch": 0.9624434389140272,
|
44673 |
+
"grad_norm": 1.9455474615097046,
|
44674 |
+
"learning_rate": 3.583378268998683e-07,
|
44675 |
+
"loss": 0.8225,
|
44676 |
+
"step": 6381
|
44677 |
+
},
|
44678 |
+
{
|
44679 |
+
"epoch": 0.9625942684766214,
|
44680 |
+
"grad_norm": 2.082988739013672,
|
44681 |
+
"learning_rate": 3.55468799378611e-07,
|
44682 |
+
"loss": 1.2573,
|
44683 |
+
"step": 6382
|
44684 |
+
},
|
44685 |
+
{
|
44686 |
+
"epoch": 0.9627450980392157,
|
44687 |
+
"grad_norm": 1.762777328491211,
|
44688 |
+
"learning_rate": 3.5261126251244223e-07,
|
44689 |
+
"loss": 0.7789,
|
44690 |
+
"step": 6383
|
44691 |
+
},
|
44692 |
+
{
|
44693 |
+
"epoch": 0.96289592760181,
|
44694 |
+
"grad_norm": 2.2491986751556396,
|
44695 |
+
"learning_rate": 3.4976521696274966e-07,
|
44696 |
+
"loss": 1.1503,
|
44697 |
+
"step": 6384
|
44698 |
+
},
|
44699 |
+
{
|
44700 |
+
"epoch": 0.9630467571644042,
|
44701 |
+
"grad_norm": 2.2879385948181152,
|
44702 |
+
"learning_rate": 3.4693066338828405e-07,
|
44703 |
+
"loss": 1.1606,
|
44704 |
+
"step": 6385
|
44705 |
+
},
|
44706 |
+
{
|
44707 |
+
"epoch": 0.9631975867269985,
|
44708 |
+
"grad_norm": 1.879817008972168,
|
44709 |
+
"learning_rate": 3.441076024451151e-07,
|
44710 |
+
"loss": 0.9548,
|
44711 |
+
"step": 6386
|
44712 |
+
},
|
44713 |
+
{
|
44714 |
+
"epoch": 0.9633484162895928,
|
44715 |
+
"grad_norm": 1.963283658027649,
|
44716 |
+
"learning_rate": 3.4129603478668114e-07,
|
44717 |
+
"loss": 1.0147,
|
44718 |
+
"step": 6387
|
44719 |
+
},
|
44720 |
+
{
|
44721 |
+
"epoch": 0.963499245852187,
|
44722 |
+
"grad_norm": 1.8836108446121216,
|
44723 |
+
"learning_rate": 3.384959610637284e-07,
|
44724 |
+
"loss": 0.9685,
|
44725 |
+
"step": 6388
|
44726 |
+
},
|
44727 |
+
{
|
44728 |
+
"epoch": 0.9636500754147813,
|
44729 |
+
"grad_norm": 2.1015186309814453,
|
44730 |
+
"learning_rate": 3.357073819243661e-07,
|
44731 |
+
"loss": 1.3716,
|
44732 |
+
"step": 6389
|
44733 |
+
},
|
44734 |
+
{
|
44735 |
+
"epoch": 0.9638009049773756,
|
44736 |
+
"grad_norm": 2.008430004119873,
|
44737 |
+
"learning_rate": 3.329302980140392e-07,
|
44738 |
+
"loss": 1.1204,
|
44739 |
+
"step": 6390
|
44740 |
+
},
|
44741 |
+
{
|
44742 |
+
"epoch": 0.9639517345399699,
|
44743 |
+
"grad_norm": 1.9310039281845093,
|
44744 |
+
"learning_rate": 3.3016470997551675e-07,
|
44745 |
+
"loss": 0.9668,
|
44746 |
+
"step": 6391
|
44747 |
+
},
|
44748 |
+
{
|
44749 |
+
"epoch": 0.9641025641025641,
|
44750 |
+
"grad_norm": 1.8051788806915283,
|
44751 |
+
"learning_rate": 3.274106184489312e-07,
|
44752 |
+
"loss": 0.8055,
|
44753 |
+
"step": 6392
|
44754 |
+
},
|
44755 |
+
{
|
44756 |
+
"epoch": 0.9642533936651584,
|
44757 |
+
"grad_norm": 1.9417747259140015,
|
44758 |
+
"learning_rate": 3.246680240717226e-07,
|
44759 |
+
"loss": 1.1606,
|
44760 |
+
"step": 6393
|
44761 |
+
},
|
44762 |
+
{
|
44763 |
+
"epoch": 0.9644042232277527,
|
44764 |
+
"grad_norm": 2.2688350677490234,
|
44765 |
+
"learning_rate": 3.219369274787054e-07,
|
44766 |
+
"loss": 1.18,
|
44767 |
+
"step": 6394
|
44768 |
+
},
|
44769 |
+
{
|
44770 |
+
"epoch": 0.9645550527903469,
|
44771 |
+
"grad_norm": 2.1807358264923096,
|
44772 |
+
"learning_rate": 3.1921732930200734e-07,
|
44773 |
+
"loss": 0.9813,
|
44774 |
+
"step": 6395
|
44775 |
+
},
|
44776 |
+
{
|
44777 |
+
"epoch": 0.9647058823529412,
|
44778 |
+
"grad_norm": 1.7102917432785034,
|
44779 |
+
"learning_rate": 3.1650923017110254e-07,
|
44780 |
+
"loss": 0.7327,
|
44781 |
+
"step": 6396
|
44782 |
+
},
|
44783 |
+
{
|
44784 |
+
"epoch": 0.9648567119155355,
|
44785 |
+
"grad_norm": 1.8029088973999023,
|
44786 |
+
"learning_rate": 3.1381263071280643e-07,
|
44787 |
+
"loss": 0.8494,
|
44788 |
+
"step": 6397
|
44789 |
+
},
|
44790 |
+
{
|
44791 |
+
"epoch": 0.9650075414781297,
|
44792 |
+
"grad_norm": 1.7781802415847778,
|
44793 |
+
"learning_rate": 3.1112753155126963e-07,
|
44794 |
+
"loss": 0.9113,
|
44795 |
+
"step": 6398
|
44796 |
+
},
|
44797 |
+
{
|
44798 |
+
"epoch": 0.965158371040724,
|
44799 |
+
"grad_norm": 1.8984512090682983,
|
44800 |
+
"learning_rate": 3.084539333079839e-07,
|
44801 |
+
"loss": 0.8357,
|
44802 |
+
"step": 6399
|
44803 |
+
},
|
44804 |
+
{
|
44805 |
+
"epoch": 0.9653092006033183,
|
44806 |
+
"grad_norm": 1.5885837078094482,
|
44807 |
+
"learning_rate": 3.0579183660177093e-07,
|
44808 |
+
"loss": 0.6474,
|
44809 |
+
"step": 6400
|
44810 |
+
},
|
44811 |
+
{
|
44812 |
+
"epoch": 0.9654600301659125,
|
44813 |
+
"grad_norm": 1.7469663619995117,
|
44814 |
+
"learning_rate": 3.0314124204880446e-07,
|
44815 |
+
"loss": 1.1415,
|
44816 |
+
"step": 6401
|
44817 |
+
},
|
44818 |
+
{
|
44819 |
+
"epoch": 0.9656108597285068,
|
44820 |
+
"grad_norm": 1.5996235609054565,
|
44821 |
+
"learning_rate": 3.0050215026257713e-07,
|
44822 |
+
"loss": 0.9154,
|
44823 |
+
"step": 6402
|
44824 |
+
},
|
44825 |
+
{
|
44826 |
+
"epoch": 0.9657616892911011,
|
44827 |
+
"grad_norm": 1.8189271688461304,
|
44828 |
+
"learning_rate": 2.978745618539336e-07,
|
44829 |
+
"loss": 1.0192,
|
44830 |
+
"step": 6403
|
44831 |
+
},
|
44832 |
+
{
|
44833 |
+
"epoch": 0.9659125188536953,
|
44834 |
+
"grad_norm": 1.7454121112823486,
|
44835 |
+
"learning_rate": 2.9525847743105405e-07,
|
44836 |
+
"loss": 1.0345,
|
44837 |
+
"step": 6404
|
44838 |
+
},
|
44839 |
+
{
|
44840 |
+
"epoch": 0.9660633484162896,
|
44841 |
+
"grad_norm": 1.6599011421203613,
|
44842 |
+
"learning_rate": 2.926538975994486e-07,
|
44843 |
+
"loss": 1.0556,
|
44844 |
+
"step": 6405
|
44845 |
+
},
|
44846 |
+
{
|
44847 |
+
"epoch": 0.9662141779788839,
|
44848 |
+
"grad_norm": 1.872502088546753,
|
44849 |
+
"learning_rate": 2.9006082296197946e-07,
|
44850 |
+
"loss": 1.0526,
|
44851 |
+
"step": 6406
|
44852 |
+
},
|
44853 |
+
{
|
44854 |
+
"epoch": 0.9663650075414781,
|
44855 |
+
"grad_norm": 1.8260844945907593,
|
44856 |
+
"learning_rate": 2.874792541188276e-07,
|
44857 |
+
"loss": 1.0962,
|
44858 |
+
"step": 6407
|
44859 |
+
},
|
44860 |
+
{
|
44861 |
+
"epoch": 0.9665158371040724,
|
44862 |
+
"grad_norm": 2.0990805625915527,
|
44863 |
+
"learning_rate": 2.849091916675206e-07,
|
44864 |
+
"loss": 1.28,
|
44865 |
+
"step": 6408
|
44866 |
+
},
|
44867 |
+
{
|
44868 |
+
"epoch": 0.9666666666666667,
|
44869 |
+
"grad_norm": 1.6568230390548706,
|
44870 |
+
"learning_rate": 2.8235063620292714e-07,
|
44871 |
+
"loss": 0.9448,
|
44872 |
+
"step": 6409
|
44873 |
+
},
|
44874 |
+
{
|
44875 |
+
"epoch": 0.9668174962292609,
|
44876 |
+
"grad_norm": 1.853630781173706,
|
44877 |
+
"learning_rate": 2.798035883172401e-07,
|
44878 |
+
"loss": 0.9706,
|
44879 |
+
"step": 6410
|
44880 |
+
},
|
44881 |
+
{
|
44882 |
+
"epoch": 0.9669683257918552,
|
44883 |
+
"grad_norm": 1.8414771556854248,
|
44884 |
+
"learning_rate": 2.7726804859999897e-07,
|
44885 |
+
"loss": 0.9215,
|
44886 |
+
"step": 6411
|
44887 |
+
},
|
44888 |
+
{
|
44889 |
+
"epoch": 0.9671191553544495,
|
44890 |
+
"grad_norm": 1.9662418365478516,
|
44891 |
+
"learning_rate": 2.7474401763807886e-07,
|
44892 |
+
"loss": 0.86,
|
44893 |
+
"step": 6412
|
44894 |
+
},
|
44895 |
+
{
|
44896 |
+
"epoch": 0.9672699849170437,
|
44897 |
+
"grad_norm": 1.7639135122299194,
|
44898 |
+
"learning_rate": 2.722314960156791e-07,
|
44899 |
+
"loss": 0.8866,
|
44900 |
+
"step": 6413
|
44901 |
+
},
|
44902 |
+
{
|
44903 |
+
"epoch": 0.967420814479638,
|
44904 |
+
"grad_norm": 1.749525785446167,
|
44905 |
+
"learning_rate": 2.6973048431435667e-07,
|
44906 |
+
"loss": 1.022,
|
44907 |
+
"step": 6414
|
44908 |
+
},
|
44909 |
+
{
|
44910 |
+
"epoch": 0.9675716440422323,
|
44911 |
+
"grad_norm": 1.6495894193649292,
|
44912 |
+
"learning_rate": 2.6724098311298183e-07,
|
44913 |
+
"loss": 0.7061,
|
44914 |
+
"step": 6415
|
44915 |
+
},
|
44916 |
+
{
|
44917 |
+
"epoch": 0.9677224736048265,
|
44918 |
+
"grad_norm": 1.8045361042022705,
|
44919 |
+
"learning_rate": 2.6476299298777705e-07,
|
44920 |
+
"loss": 1.0393,
|
44921 |
+
"step": 6416
|
44922 |
+
},
|
44923 |
+
{
|
44924 |
+
"epoch": 0.9678733031674208,
|
44925 |
+
"grad_norm": 1.9385876655578613,
|
44926 |
+
"learning_rate": 2.6229651451229465e-07,
|
44927 |
+
"loss": 1.0813,
|
44928 |
+
"step": 6417
|
44929 |
+
},
|
44930 |
+
{
|
44931 |
+
"epoch": 0.9680241327300151,
|
44932 |
+
"grad_norm": 2.213212490081787,
|
44933 |
+
"learning_rate": 2.5984154825742235e-07,
|
44934 |
+
"loss": 1.3767,
|
44935 |
+
"step": 6418
|
44936 |
+
},
|
44937 |
+
{
|
44938 |
+
"epoch": 0.9681749622926094,
|
44939 |
+
"grad_norm": 2.0189006328582764,
|
44940 |
+
"learning_rate": 2.573980947913779e-07,
|
44941 |
+
"loss": 1.4085,
|
44942 |
+
"step": 6419
|
44943 |
+
},
|
44944 |
+
{
|
44945 |
+
"epoch": 0.9683257918552036,
|
44946 |
+
"grad_norm": 2.1515588760375977,
|
44947 |
+
"learning_rate": 2.549661546797255e-07,
|
44948 |
+
"loss": 1.3382,
|
44949 |
+
"step": 6420
|
44950 |
+
},
|
44951 |
+
{
|
44952 |
+
"epoch": 0.9684766214177979,
|
44953 |
+
"grad_norm": 2.0453109741210938,
|
44954 |
+
"learning_rate": 2.5254572848535383e-07,
|
44955 |
+
"loss": 1.1791,
|
44956 |
+
"step": 6421
|
44957 |
+
},
|
44958 |
+
{
|
44959 |
+
"epoch": 0.9686274509803922,
|
44960 |
+
"grad_norm": 2.314772844314575,
|
44961 |
+
"learning_rate": 2.501368167684981e-07,
|
44962 |
+
"loss": 1.2573,
|
44963 |
+
"step": 6422
|
44964 |
+
},
|
44965 |
+
{
|
44966 |
+
"epoch": 0.9687782805429864,
|
44967 |
+
"grad_norm": 1.948198914527893,
|
44968 |
+
"learning_rate": 2.477394200867178e-07,
|
44969 |
+
"loss": 1.0669,
|
44970 |
+
"step": 6423
|
44971 |
+
},
|
44972 |
+
{
|
44973 |
+
"epoch": 0.9689291101055807,
|
44974 |
+
"grad_norm": 2.3818795680999756,
|
44975 |
+
"learning_rate": 2.45353538994908e-07,
|
44976 |
+
"loss": 1.5041,
|
44977 |
+
"step": 6424
|
44978 |
+
},
|
44979 |
+
{
|
44980 |
+
"epoch": 0.969079939668175,
|
44981 |
+
"grad_norm": 1.7617933750152588,
|
44982 |
+
"learning_rate": 2.429791740453102e-07,
|
44983 |
+
"loss": 0.9809,
|
44984 |
+
"step": 6425
|
44985 |
+
},
|
44986 |
+
{
|
44987 |
+
"epoch": 0.9692307692307692,
|
44988 |
+
"grad_norm": 1.5112937688827515,
|
44989 |
+
"learning_rate": 2.406163257874794e-07,
|
44990 |
+
"loss": 0.6922,
|
44991 |
+
"step": 6426
|
44992 |
+
},
|
44993 |
+
{
|
44994 |
+
"epoch": 0.9693815987933635,
|
44995 |
+
"grad_norm": 1.7321679592132568,
|
44996 |
+
"learning_rate": 2.3826499476832797e-07,
|
44997 |
+
"loss": 0.8135,
|
44998 |
+
"step": 6427
|
44999 |
+
},
|
45000 |
+
{
|
45001 |
+
"epoch": 0.9695324283559578,
|
45002 |
+
"grad_norm": 2.0587239265441895,
|
45003 |
+
"learning_rate": 2.3592518153208732e-07,
|
45004 |
+
"loss": 0.9306,
|
45005 |
+
"step": 6428
|
45006 |
+
},
|
45007 |
+
{
|
45008 |
+
"epoch": 0.969683257918552,
|
45009 |
+
"grad_norm": 2.0357539653778076,
|
45010 |
+
"learning_rate": 2.3359688662032975e-07,
|
45011 |
+
"loss": 1.1696,
|
45012 |
+
"step": 6429
|
45013 |
+
},
|
45014 |
+
{
|
45015 |
+
"epoch": 0.9698340874811463,
|
45016 |
+
"grad_norm": 1.9683305025100708,
|
45017 |
+
"learning_rate": 2.3128011057195753e-07,
|
45018 |
+
"loss": 1.0745,
|
45019 |
+
"step": 6430
|
45020 |
+
},
|
45021 |
+
{
|
45022 |
+
"epoch": 0.9699849170437406,
|
45023 |
+
"grad_norm": 2.4005391597747803,
|
45024 |
+
"learning_rate": 2.289748539232084e-07,
|
45025 |
+
"loss": 1.2111,
|
45026 |
+
"step": 6431
|
45027 |
+
},
|
45028 |
+
{
|
45029 |
+
"epoch": 0.9701357466063348,
|
45030 |
+
"grad_norm": 1.8384374380111694,
|
45031 |
+
"learning_rate": 2.2668111720764996e-07,
|
45032 |
+
"loss": 0.9713,
|
45033 |
+
"step": 6432
|
45034 |
+
},
|
45035 |
+
{
|
45036 |
+
"epoch": 0.9702865761689291,
|
45037 |
+
"grad_norm": 2.119652271270752,
|
45038 |
+
"learning_rate": 2.2439890095619641e-07,
|
45039 |
+
"loss": 1.1779,
|
45040 |
+
"step": 6433
|
45041 |
+
},
|
45042 |
+
{
|
45043 |
+
"epoch": 0.9704374057315234,
|
45044 |
+
"grad_norm": 2.0867671966552734,
|
45045 |
+
"learning_rate": 2.2212820569707526e-07,
|
45046 |
+
"loss": 1.1961,
|
45047 |
+
"step": 6434
|
45048 |
+
},
|
45049 |
+
{
|
45050 |
+
"epoch": 0.9705882352941176,
|
45051 |
+
"grad_norm": 1.7994745969772339,
|
45052 |
+
"learning_rate": 2.1986903195586607e-07,
|
45053 |
+
"loss": 0.9142,
|
45054 |
+
"step": 6435
|
45055 |
+
},
|
45056 |
+
{
|
45057 |
+
"epoch": 0.9707390648567119,
|
45058 |
+
"grad_norm": 2.197286367416382,
|
45059 |
+
"learning_rate": 2.1762138025547275e-07,
|
45060 |
+
"loss": 1.2425,
|
45061 |
+
"step": 6436
|
45062 |
+
},
|
45063 |
+
{
|
45064 |
+
"epoch": 0.9708898944193062,
|
45065 |
+
"grad_norm": 1.8814419507980347,
|
45066 |
+
"learning_rate": 2.1538525111613473e-07,
|
45067 |
+
"loss": 1.0905,
|
45068 |
+
"step": 6437
|
45069 |
+
},
|
45070 |
+
{
|
45071 |
+
"epoch": 0.9710407239819004,
|
45072 |
+
"grad_norm": 2.010801315307617,
|
45073 |
+
"learning_rate": 2.1316064505542133e-07,
|
45074 |
+
"loss": 1.0614,
|
45075 |
+
"step": 6438
|
45076 |
+
},
|
45077 |
+
{
|
45078 |
+
"epoch": 0.9711915535444947,
|
45079 |
+
"grad_norm": 1.5283746719360352,
|
45080 |
+
"learning_rate": 2.1094756258823735e-07,
|
45081 |
+
"loss": 0.6536,
|
45082 |
+
"step": 6439
|
45083 |
+
},
|
45084 |
+
{
|
45085 |
+
"epoch": 0.971342383107089,
|
45086 |
+
"grad_norm": 2.065852165222168,
|
45087 |
+
"learning_rate": 2.08746004226823e-07,
|
45088 |
+
"loss": 1.2922,
|
45089 |
+
"step": 6440
|
45090 |
+
},
|
45091 |
+
{
|
45092 |
+
"epoch": 0.9714932126696832,
|
45093 |
+
"grad_norm": 1.914737582206726,
|
45094 |
+
"learning_rate": 2.0655597048074293e-07,
|
45095 |
+
"loss": 1.1637,
|
45096 |
+
"step": 6441
|
45097 |
+
},
|
45098 |
+
{
|
45099 |
+
"epoch": 0.9716440422322775,
|
45100 |
+
"grad_norm": 2.0344982147216797,
|
45101 |
+
"learning_rate": 2.043774618568972e-07,
|
45102 |
+
"loss": 1.0028,
|
45103 |
+
"step": 6442
|
45104 |
+
},
|
45105 |
+
{
|
45106 |
+
"epoch": 0.9717948717948718,
|
45107 |
+
"grad_norm": 2.1902503967285156,
|
45108 |
+
"learning_rate": 2.0221047885953248e-07,
|
45109 |
+
"loss": 1.1506,
|
45110 |
+
"step": 6443
|
45111 |
+
},
|
45112 |
+
{
|
45113 |
+
"epoch": 0.971945701357466,
|
45114 |
+
"grad_norm": 2.2948451042175293,
|
45115 |
+
"learning_rate": 2.0005502199020309e-07,
|
45116 |
+
"loss": 1.2524,
|
45117 |
+
"step": 6444
|
45118 |
+
},
|
45119 |
+
{
|
45120 |
+
"epoch": 0.9720965309200603,
|
45121 |
+
"grad_norm": 2.206702709197998,
|
45122 |
+
"learning_rate": 1.9791109174780996e-07,
|
45123 |
+
"loss": 1.012,
|
45124 |
+
"step": 6445
|
45125 |
+
},
|
45126 |
+
{
|
45127 |
+
"epoch": 0.9722473604826546,
|
45128 |
+
"grad_norm": 1.6949716806411743,
|
45129 |
+
"learning_rate": 1.957786886285895e-07,
|
45130 |
+
"loss": 0.734,
|
45131 |
+
"step": 6446
|
45132 |
+
},
|
45133 |
+
{
|
45134 |
+
"epoch": 0.9723981900452489,
|
45135 |
+
"grad_norm": 1.4771981239318848,
|
45136 |
+
"learning_rate": 1.9365781312610244e-07,
|
45137 |
+
"loss": 0.6859,
|
45138 |
+
"step": 6447
|
45139 |
+
},
|
45140 |
+
{
|
45141 |
+
"epoch": 0.9725490196078431,
|
45142 |
+
"grad_norm": 1.7075660228729248,
|
45143 |
+
"learning_rate": 1.915484657312505e-07,
|
45144 |
+
"loss": 0.7901,
|
45145 |
+
"step": 6448
|
45146 |
+
},
|
45147 |
+
{
|
45148 |
+
"epoch": 0.9726998491704374,
|
45149 |
+
"grad_norm": 2.1574273109436035,
|
45150 |
+
"learning_rate": 1.8945064693224322e-07,
|
45151 |
+
"loss": 1.1075,
|
45152 |
+
"step": 6449
|
45153 |
+
},
|
45154 |
+
{
|
45155 |
+
"epoch": 0.9728506787330317,
|
45156 |
+
"grad_norm": 1.7101869583129883,
|
45157 |
+
"learning_rate": 1.873643572146533e-07,
|
45158 |
+
"loss": 0.8212,
|
45159 |
+
"step": 6450
|
45160 |
+
},
|
45161 |
+
{
|
45162 |
+
"epoch": 0.9730015082956259,
|
45163 |
+
"grad_norm": 2.248549461364746,
|
45164 |
+
"learning_rate": 1.8528959706136662e-07,
|
45165 |
+
"loss": 1.9165,
|
45166 |
+
"step": 6451
|
45167 |
+
},
|
45168 |
+
{
|
45169 |
+
"epoch": 0.9731523378582202,
|
45170 |
+
"grad_norm": 2.1564855575561523,
|
45171 |
+
"learning_rate": 1.8322636695260465e-07,
|
45172 |
+
"loss": 1.3283,
|
45173 |
+
"step": 6452
|
45174 |
+
},
|
45175 |
+
{
|
45176 |
+
"epoch": 0.9733031674208145,
|
45177 |
+
"grad_norm": 1.9545012712478638,
|
45178 |
+
"learning_rate": 1.811746673659187e-07,
|
45179 |
+
"loss": 1.0547,
|
45180 |
+
"step": 6453
|
45181 |
}
|
45182 |
],
|
45183 |
"logging_steps": 1,
|
|
|
45197 |
"attributes": {}
|
45198 |
}
|
45199 |
},
|
45200 |
+
"total_flos": 7.172916759861658e+18,
|
45201 |
"train_batch_size": 4,
|
45202 |
"trial_name": null,
|
45203 |
"trial_params": null
|