Training in progress, step 6630, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 377528296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1aacc07d7a6d159824c1c0ee35e7479f76a36457267431cfb728017350d0a453
|
3 |
size 377528296
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 755217530
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9218c11f6e8ac0dd6bfed5d5c509c181cb6ba9b71314da1beccfa02b35aac35
|
3 |
size 755217530
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a9628c6d951d35b24588dd35ad0842eb2aa397f35a7d2e98b7ac2aa77eb0f12
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2efcaa017136af3f1586f7ffd299c32a1aacae942ad78e9c83f4627abad725ab
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -45178,6 +45178,1245 @@
|
|
45178 |
"learning_rate": 1.811746673659187e-07,
|
45179 |
"loss": 1.0547,
|
45180 |
"step": 6453
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45181 |
}
|
45182 |
],
|
45183 |
"logging_steps": 1,
|
@@ -45192,12 +46431,12 @@
|
|
45192 |
"should_evaluate": false,
|
45193 |
"should_log": false,
|
45194 |
"should_save": true,
|
45195 |
-
"should_training_stop":
|
45196 |
},
|
45197 |
"attributes": {}
|
45198 |
}
|
45199 |
},
|
45200 |
-
"total_flos": 7.
|
45201 |
"train_batch_size": 4,
|
45202 |
"trial_name": null,
|
45203 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 6630,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
45178 |
"learning_rate": 1.811746673659187e-07,
|
45179 |
"loss": 1.0547,
|
45180 |
"step": 6453
|
45181 |
+
},
|
45182 |
+
{
|
45183 |
+
"epoch": 0.9734539969834087,
|
45184 |
+
"grad_norm": 2.080857992172241,
|
45185 |
+
"learning_rate": 1.7913449877619558e-07,
|
45186 |
+
"loss": 1.5701,
|
45187 |
+
"step": 6454
|
45188 |
+
},
|
45189 |
+
{
|
45190 |
+
"epoch": 0.973604826546003,
|
45191 |
+
"grad_norm": 1.761729121208191,
|
45192 |
+
"learning_rate": 1.7710586165564093e-07,
|
45193 |
+
"loss": 1.0911,
|
45194 |
+
"step": 6455
|
45195 |
+
},
|
45196 |
+
{
|
45197 |
+
"epoch": 0.9737556561085973,
|
45198 |
+
"grad_norm": 1.5841683149337769,
|
45199 |
+
"learning_rate": 1.750887564738124e-07,
|
45200 |
+
"loss": 0.8346,
|
45201 |
+
"step": 6456
|
45202 |
+
},
|
45203 |
+
{
|
45204 |
+
"epoch": 0.9739064856711915,
|
45205 |
+
"grad_norm": 2.080693483352661,
|
45206 |
+
"learning_rate": 1.7308318369757548e-07,
|
45207 |
+
"loss": 1.078,
|
45208 |
+
"step": 6457
|
45209 |
+
},
|
45210 |
+
{
|
45211 |
+
"epoch": 0.9740573152337858,
|
45212 |
+
"grad_norm": 1.6733683347702026,
|
45213 |
+
"learning_rate": 1.7108914379114772e-07,
|
45214 |
+
"loss": 0.9827,
|
45215 |
+
"step": 6458
|
45216 |
+
},
|
45217 |
+
{
|
45218 |
+
"epoch": 0.9742081447963801,
|
45219 |
+
"grad_norm": 1.8590120077133179,
|
45220 |
+
"learning_rate": 1.691066372160599e-07,
|
45221 |
+
"loss": 0.9611,
|
45222 |
+
"step": 6459
|
45223 |
+
},
|
45224 |
+
{
|
45225 |
+
"epoch": 0.9743589743589743,
|
45226 |
+
"grad_norm": 2.000999927520752,
|
45227 |
+
"learning_rate": 1.6713566443117833e-07,
|
45228 |
+
"loss": 1.2743,
|
45229 |
+
"step": 6460
|
45230 |
+
},
|
45231 |
+
{
|
45232 |
+
"epoch": 0.9745098039215686,
|
45233 |
+
"grad_norm": 1.6481130123138428,
|
45234 |
+
"learning_rate": 1.651762258927103e-07,
|
45235 |
+
"loss": 0.8385,
|
45236 |
+
"step": 6461
|
45237 |
+
},
|
45238 |
+
{
|
45239 |
+
"epoch": 0.9746606334841629,
|
45240 |
+
"grad_norm": 1.4775646924972534,
|
45241 |
+
"learning_rate": 1.6322832205417637e-07,
|
45242 |
+
"loss": 0.6422,
|
45243 |
+
"step": 6462
|
45244 |
+
},
|
45245 |
+
{
|
45246 |
+
"epoch": 0.9748114630467571,
|
45247 |
+
"grad_norm": 1.965220332145691,
|
45248 |
+
"learning_rate": 1.612919533664381e-07,
|
45249 |
+
"loss": 1.1127,
|
45250 |
+
"step": 6463
|
45251 |
+
},
|
45252 |
+
{
|
45253 |
+
"epoch": 0.9749622926093514,
|
45254 |
+
"grad_norm": 1.770487904548645,
|
45255 |
+
"learning_rate": 1.5936712027768695e-07,
|
45256 |
+
"loss": 0.9725,
|
45257 |
+
"step": 6464
|
45258 |
+
},
|
45259 |
+
{
|
45260 |
+
"epoch": 0.9751131221719457,
|
45261 |
+
"grad_norm": 1.8315738439559937,
|
45262 |
+
"learning_rate": 1.5745382323343883e-07,
|
45263 |
+
"loss": 0.9438,
|
45264 |
+
"step": 6465
|
45265 |
+
},
|
45266 |
+
{
|
45267 |
+
"epoch": 0.97526395173454,
|
45268 |
+
"grad_norm": 1.8729884624481201,
|
45269 |
+
"learning_rate": 1.5555206267655055e-07,
|
45270 |
+
"loss": 0.919,
|
45271 |
+
"step": 6466
|
45272 |
+
},
|
45273 |
+
{
|
45274 |
+
"epoch": 0.9754147812971342,
|
45275 |
+
"grad_norm": 2.053765058517456,
|
45276 |
+
"learning_rate": 1.5366183904719222e-07,
|
45277 |
+
"loss": 1.1069,
|
45278 |
+
"step": 6467
|
45279 |
+
},
|
45280 |
+
{
|
45281 |
+
"epoch": 0.9755656108597285,
|
45282 |
+
"grad_norm": 1.6839706897735596,
|
45283 |
+
"learning_rate": 1.5178315278287502e-07,
|
45284 |
+
"loss": 0.975,
|
45285 |
+
"step": 6468
|
45286 |
+
},
|
45287 |
+
{
|
45288 |
+
"epoch": 0.9757164404223228,
|
45289 |
+
"grad_norm": 1.818885087966919,
|
45290 |
+
"learning_rate": 1.4991600431843443e-07,
|
45291 |
+
"loss": 0.914,
|
45292 |
+
"step": 6469
|
45293 |
+
},
|
45294 |
+
{
|
45295 |
+
"epoch": 0.975867269984917,
|
45296 |
+
"grad_norm": 1.8105655908584595,
|
45297 |
+
"learning_rate": 1.48060394086047e-07,
|
45298 |
+
"loss": 0.9206,
|
45299 |
+
"step": 6470
|
45300 |
+
},
|
45301 |
+
{
|
45302 |
+
"epoch": 0.9760180995475113,
|
45303 |
+
"grad_norm": 2.1190874576568604,
|
45304 |
+
"learning_rate": 1.462163225151969e-07,
|
45305 |
+
"loss": 1.3382,
|
45306 |
+
"step": 6471
|
45307 |
+
},
|
45308 |
+
{
|
45309 |
+
"epoch": 0.9761689291101056,
|
45310 |
+
"grad_norm": 1.9208011627197266,
|
45311 |
+
"learning_rate": 1.4438379003272605e-07,
|
45312 |
+
"loss": 0.9099,
|
45313 |
+
"step": 6472
|
45314 |
+
},
|
45315 |
+
{
|
45316 |
+
"epoch": 0.9763197586726998,
|
45317 |
+
"grad_norm": 1.6620832681655884,
|
45318 |
+
"learning_rate": 1.4256279706277299e-07,
|
45319 |
+
"loss": 0.8732,
|
45320 |
+
"step": 6473
|
45321 |
+
},
|
45322 |
+
{
|
45323 |
+
"epoch": 0.9764705882352941,
|
45324 |
+
"grad_norm": 2.034463405609131,
|
45325 |
+
"learning_rate": 1.4075334402683937e-07,
|
45326 |
+
"loss": 0.9726,
|
45327 |
+
"step": 6474
|
45328 |
+
},
|
45329 |
+
{
|
45330 |
+
"epoch": 0.9766214177978884,
|
45331 |
+
"grad_norm": 1.8176192045211792,
|
45332 |
+
"learning_rate": 1.3895543134372358e-07,
|
45333 |
+
"loss": 1.0147,
|
45334 |
+
"step": 6475
|
45335 |
+
},
|
45336 |
+
{
|
45337 |
+
"epoch": 0.9767722473604826,
|
45338 |
+
"grad_norm": 1.8768322467803955,
|
45339 |
+
"learning_rate": 1.3716905942957602e-07,
|
45340 |
+
"loss": 0.9693,
|
45341 |
+
"step": 6476
|
45342 |
+
},
|
45343 |
+
{
|
45344 |
+
"epoch": 0.9769230769230769,
|
45345 |
+
"grad_norm": 1.8389067649841309,
|
45346 |
+
"learning_rate": 1.35394228697866e-07,
|
45347 |
+
"loss": 1.0102,
|
45348 |
+
"step": 6477
|
45349 |
+
},
|
45350 |
+
{
|
45351 |
+
"epoch": 0.9770739064856712,
|
45352 |
+
"grad_norm": 2.1269052028656006,
|
45353 |
+
"learning_rate": 1.3363093955939266e-07,
|
45354 |
+
"loss": 1.0953,
|
45355 |
+
"step": 6478
|
45356 |
+
},
|
45357 |
+
{
|
45358 |
+
"epoch": 0.9772247360482654,
|
45359 |
+
"grad_norm": 2.317656993865967,
|
45360 |
+
"learning_rate": 1.3187919242229063e-07,
|
45361 |
+
"loss": 1.18,
|
45362 |
+
"step": 6479
|
45363 |
+
},
|
45364 |
+
{
|
45365 |
+
"epoch": 0.9773755656108597,
|
45366 |
+
"grad_norm": 1.6912294626235962,
|
45367 |
+
"learning_rate": 1.3013898769200784e-07,
|
45368 |
+
"loss": 0.8096,
|
45369 |
+
"step": 6480
|
45370 |
+
},
|
45371 |
+
{
|
45372 |
+
"epoch": 0.977526395173454,
|
45373 |
+
"grad_norm": 1.921573281288147,
|
45374 |
+
"learning_rate": 1.2841032577133317e-07,
|
45375 |
+
"loss": 1.0757,
|
45376 |
+
"step": 6481
|
45377 |
+
},
|
45378 |
+
{
|
45379 |
+
"epoch": 0.9776772247360482,
|
45380 |
+
"grad_norm": 1.7856738567352295,
|
45381 |
+
"learning_rate": 1.2669320706037991e-07,
|
45382 |
+
"loss": 0.9704,
|
45383 |
+
"step": 6482
|
45384 |
+
},
|
45385 |
+
{
|
45386 |
+
"epoch": 0.9778280542986425,
|
45387 |
+
"grad_norm": 2.447734832763672,
|
45388 |
+
"learning_rate": 1.2498763195659125e-07,
|
45389 |
+
"loss": 1.2028,
|
45390 |
+
"step": 6483
|
45391 |
+
},
|
45392 |
+
{
|
45393 |
+
"epoch": 0.9779788838612368,
|
45394 |
+
"grad_norm": 1.6459358930587769,
|
45395 |
+
"learning_rate": 1.2329360085473472e-07,
|
45396 |
+
"loss": 0.7704,
|
45397 |
+
"step": 6484
|
45398 |
+
},
|
45399 |
+
{
|
45400 |
+
"epoch": 0.978129713423831,
|
45401 |
+
"grad_norm": 2.0553648471832275,
|
45402 |
+
"learning_rate": 1.2161111414691896e-07,
|
45403 |
+
"loss": 1.124,
|
45404 |
+
"step": 6485
|
45405 |
+
},
|
45406 |
+
{
|
45407 |
+
"epoch": 0.9782805429864253,
|
45408 |
+
"grad_norm": 1.5967365503311157,
|
45409 |
+
"learning_rate": 1.1994017222255461e-07,
|
45410 |
+
"loss": 0.8144,
|
45411 |
+
"step": 6486
|
45412 |
+
},
|
45413 |
+
{
|
45414 |
+
"epoch": 0.9784313725490196,
|
45415 |
+
"grad_norm": 1.9038810729980469,
|
45416 |
+
"learning_rate": 1.1828077546840455e-07,
|
45417 |
+
"loss": 0.9251,
|
45418 |
+
"step": 6487
|
45419 |
+
},
|
45420 |
+
{
|
45421 |
+
"epoch": 0.9785822021116138,
|
45422 |
+
"grad_norm": 1.8847215175628662,
|
45423 |
+
"learning_rate": 1.1663292426854489e-07,
|
45424 |
+
"loss": 0.9066,
|
45425 |
+
"step": 6488
|
45426 |
+
},
|
45427 |
+
{
|
45428 |
+
"epoch": 0.9787330316742081,
|
45429 |
+
"grad_norm": 2.5748424530029297,
|
45430 |
+
"learning_rate": 1.1499661900439274e-07,
|
45431 |
+
"loss": 1.0472,
|
45432 |
+
"step": 6489
|
45433 |
+
},
|
45434 |
+
{
|
45435 |
+
"epoch": 0.9788838612368024,
|
45436 |
+
"grad_norm": 2.0933051109313965,
|
45437 |
+
"learning_rate": 1.1337186005467848e-07,
|
45438 |
+
"loss": 1.2256,
|
45439 |
+
"step": 6490
|
45440 |
+
},
|
45441 |
+
{
|
45442 |
+
"epoch": 0.9790346907993966,
|
45443 |
+
"grad_norm": 2.030015707015991,
|
45444 |
+
"learning_rate": 1.1175864779547351e-07,
|
45445 |
+
"loss": 0.8935,
|
45446 |
+
"step": 6491
|
45447 |
+
},
|
45448 |
+
{
|
45449 |
+
"epoch": 0.9791855203619909,
|
45450 |
+
"grad_norm": 2.0891942977905273,
|
45451 |
+
"learning_rate": 1.101569826001625e-07,
|
45452 |
+
"loss": 1.0113,
|
45453 |
+
"step": 6492
|
45454 |
+
},
|
45455 |
+
{
|
45456 |
+
"epoch": 0.9793363499245852,
|
45457 |
+
"grad_norm": 2.1114308834075928,
|
45458 |
+
"learning_rate": 1.0856686483946555e-07,
|
45459 |
+
"loss": 1.0275,
|
45460 |
+
"step": 6493
|
45461 |
+
},
|
45462 |
+
{
|
45463 |
+
"epoch": 0.9794871794871794,
|
45464 |
+
"grad_norm": 2.236339569091797,
|
45465 |
+
"learning_rate": 1.0698829488143269e-07,
|
45466 |
+
"loss": 1.2783,
|
45467 |
+
"step": 6494
|
45468 |
+
},
|
45469 |
+
{
|
45470 |
+
"epoch": 0.9796380090497737,
|
45471 |
+
"grad_norm": 2.22591233253479,
|
45472 |
+
"learning_rate": 1.0542127309143834e-07,
|
45473 |
+
"loss": 1.1781,
|
45474 |
+
"step": 6495
|
45475 |
+
},
|
45476 |
+
{
|
45477 |
+
"epoch": 0.979788838612368,
|
45478 |
+
"grad_norm": 1.9555222988128662,
|
45479 |
+
"learning_rate": 1.0386579983217571e-07,
|
45480 |
+
"loss": 0.8283,
|
45481 |
+
"step": 6496
|
45482 |
+
},
|
45483 |
+
{
|
45484 |
+
"epoch": 0.9799396681749623,
|
45485 |
+
"grad_norm": 1.7273701429367065,
|
45486 |
+
"learning_rate": 1.02321875463679e-07,
|
45487 |
+
"loss": 0.6755,
|
45488 |
+
"step": 6497
|
45489 |
+
},
|
45490 |
+
{
|
45491 |
+
"epoch": 0.9800904977375565,
|
45492 |
+
"grad_norm": 1.6880528926849365,
|
45493 |
+
"learning_rate": 1.0078950034330681e-07,
|
45494 |
+
"loss": 0.7478,
|
45495 |
+
"step": 6498
|
45496 |
+
},
|
45497 |
+
{
|
45498 |
+
"epoch": 0.9802413273001508,
|
45499 |
+
"grad_norm": 1.6948614120483398,
|
45500 |
+
"learning_rate": 9.926867482573099e-08,
|
45501 |
+
"loss": 0.8543,
|
45502 |
+
"step": 6499
|
45503 |
+
},
|
45504 |
+
{
|
45505 |
+
"epoch": 0.9803921568627451,
|
45506 |
+
"grad_norm": 1.8066236972808838,
|
45507 |
+
"learning_rate": 9.77593992629644e-08,
|
45508 |
+
"loss": 0.7576,
|
45509 |
+
"step": 6500
|
45510 |
+
},
|
45511 |
+
{
|
45512 |
+
"epoch": 0.9805429864253393,
|
45513 |
+
"grad_norm": 1.5170810222625732,
|
45514 |
+
"learning_rate": 9.626167400433872e-08,
|
45515 |
+
"loss": 0.8879,
|
45516 |
+
"step": 6501
|
45517 |
+
},
|
45518 |
+
{
|
45519 |
+
"epoch": 0.9806938159879336,
|
45520 |
+
"grad_norm": 1.8861565589904785,
|
45521 |
+
"learning_rate": 9.477549939652108e-08,
|
45522 |
+
"loss": 1.2489,
|
45523 |
+
"step": 6502
|
45524 |
+
},
|
45525 |
+
{
|
45526 |
+
"epoch": 0.9808446455505279,
|
45527 |
+
"grad_norm": 2.005974054336548,
|
45528 |
+
"learning_rate": 9.330087578349745e-08,
|
45529 |
+
"loss": 1.0192,
|
45530 |
+
"step": 6503
|
45531 |
+
},
|
45532 |
+
{
|
45533 |
+
"epoch": 0.9809954751131221,
|
45534 |
+
"grad_norm": 1.9359047412872314,
|
45535 |
+
"learning_rate": 9.183780350657812e-08,
|
45536 |
+
"loss": 1.1492,
|
45537 |
+
"step": 6504
|
45538 |
+
},
|
45539 |
+
{
|
45540 |
+
"epoch": 0.9811463046757164,
|
45541 |
+
"grad_norm": 1.7721647024154663,
|
45542 |
+
"learning_rate": 9.038628290440887e-08,
|
45543 |
+
"loss": 0.941,
|
45544 |
+
"step": 6505
|
45545 |
+
},
|
45546 |
+
{
|
45547 |
+
"epoch": 0.9812971342383107,
|
45548 |
+
"grad_norm": 1.5403116941452026,
|
45549 |
+
"learning_rate": 8.89463143129543e-08,
|
45550 |
+
"loss": 0.8233,
|
45551 |
+
"step": 6506
|
45552 |
+
},
|
45553 |
+
{
|
45554 |
+
"epoch": 0.9814479638009049,
|
45555 |
+
"grad_norm": 1.6215004920959473,
|
45556 |
+
"learning_rate": 8.751789806550892e-08,
|
45557 |
+
"loss": 0.7563,
|
45558 |
+
"step": 6507
|
45559 |
+
},
|
45560 |
+
{
|
45561 |
+
"epoch": 0.9815987933634992,
|
45562 |
+
"grad_norm": 2.063256025314331,
|
45563 |
+
"learning_rate": 8.610103449268603e-08,
|
45564 |
+
"loss": 1.3814,
|
45565 |
+
"step": 6508
|
45566 |
+
},
|
45567 |
+
{
|
45568 |
+
"epoch": 0.9817496229260935,
|
45569 |
+
"grad_norm": 1.958470344543457,
|
45570 |
+
"learning_rate": 8.469572392243996e-08,
|
45571 |
+
"loss": 1.3061,
|
45572 |
+
"step": 6509
|
45573 |
+
},
|
45574 |
+
{
|
45575 |
+
"epoch": 0.9819004524886877,
|
45576 |
+
"grad_norm": 1.8253540992736816,
|
45577 |
+
"learning_rate": 8.330196668003831e-08,
|
45578 |
+
"loss": 1.1178,
|
45579 |
+
"step": 6510
|
45580 |
+
},
|
45581 |
+
{
|
45582 |
+
"epoch": 0.982051282051282,
|
45583 |
+
"grad_norm": 2.160386323928833,
|
45584 |
+
"learning_rate": 8.191976308807858e-08,
|
45585 |
+
"loss": 1.3507,
|
45586 |
+
"step": 6511
|
45587 |
+
},
|
45588 |
+
{
|
45589 |
+
"epoch": 0.9822021116138763,
|
45590 |
+
"grad_norm": 1.7865500450134277,
|
45591 |
+
"learning_rate": 8.054911346647709e-08,
|
45592 |
+
"loss": 1.0007,
|
45593 |
+
"step": 6512
|
45594 |
+
},
|
45595 |
+
{
|
45596 |
+
"epoch": 0.9823529411764705,
|
45597 |
+
"grad_norm": 1.9779447317123413,
|
45598 |
+
"learning_rate": 7.919001813249671e-08,
|
45599 |
+
"loss": 1.1617,
|
45600 |
+
"step": 6513
|
45601 |
+
},
|
45602 |
+
{
|
45603 |
+
"epoch": 0.9825037707390648,
|
45604 |
+
"grad_norm": 1.7435057163238525,
|
45605 |
+
"learning_rate": 7.784247740069694e-08,
|
45606 |
+
"loss": 0.8858,
|
45607 |
+
"step": 6514
|
45608 |
+
},
|
45609 |
+
{
|
45610 |
+
"epoch": 0.9826546003016591,
|
45611 |
+
"grad_norm": 1.9248754978179932,
|
45612 |
+
"learning_rate": 7.650649158298384e-08,
|
45613 |
+
"loss": 1.0385,
|
45614 |
+
"step": 6515
|
45615 |
+
},
|
45616 |
+
{
|
45617 |
+
"epoch": 0.9828054298642533,
|
45618 |
+
"grad_norm": 1.8311914205551147,
|
45619 |
+
"learning_rate": 7.518206098858782e-08,
|
45620 |
+
"loss": 0.8934,
|
45621 |
+
"step": 6516
|
45622 |
+
},
|
45623 |
+
{
|
45624 |
+
"epoch": 0.9829562594268476,
|
45625 |
+
"grad_norm": 1.7870116233825684,
|
45626 |
+
"learning_rate": 7.386918592405256e-08,
|
45627 |
+
"loss": 1.0486,
|
45628 |
+
"step": 6517
|
45629 |
+
},
|
45630 |
+
{
|
45631 |
+
"epoch": 0.9831070889894419,
|
45632 |
+
"grad_norm": 2.1831729412078857,
|
45633 |
+
"learning_rate": 7.256786669325721e-08,
|
45634 |
+
"loss": 1.2173,
|
45635 |
+
"step": 6518
|
45636 |
+
},
|
45637 |
+
{
|
45638 |
+
"epoch": 0.9832579185520361,
|
45639 |
+
"grad_norm": 1.9115618467330933,
|
45640 |
+
"learning_rate": 7.127810359740527e-08,
|
45641 |
+
"loss": 0.8471,
|
45642 |
+
"step": 6519
|
45643 |
+
},
|
45644 |
+
{
|
45645 |
+
"epoch": 0.9834087481146304,
|
45646 |
+
"grad_norm": 2.1784827709198,
|
45647 |
+
"learning_rate": 6.999989693501908e-08,
|
45648 |
+
"loss": 1.3449,
|
45649 |
+
"step": 6520
|
45650 |
+
},
|
45651 |
+
{
|
45652 |
+
"epoch": 0.9835595776772247,
|
45653 |
+
"grad_norm": 1.8847562074661255,
|
45654 |
+
"learning_rate": 6.873324700195083e-08,
|
45655 |
+
"loss": 1.0211,
|
45656 |
+
"step": 6521
|
45657 |
+
},
|
45658 |
+
{
|
45659 |
+
"epoch": 0.983710407239819,
|
45660 |
+
"grad_norm": 2.0372416973114014,
|
45661 |
+
"learning_rate": 6.74781540913827e-08,
|
45662 |
+
"loss": 1.1225,
|
45663 |
+
"step": 6522
|
45664 |
+
},
|
45665 |
+
{
|
45666 |
+
"epoch": 0.9838612368024132,
|
45667 |
+
"grad_norm": 1.8074803352355957,
|
45668 |
+
"learning_rate": 6.623461849381563e-08,
|
45669 |
+
"loss": 0.942,
|
45670 |
+
"step": 6523
|
45671 |
+
},
|
45672 |
+
{
|
45673 |
+
"epoch": 0.9840120663650075,
|
45674 |
+
"grad_norm": 1.759004831314087,
|
45675 |
+
"learning_rate": 6.50026404970694e-08,
|
45676 |
+
"loss": 1.0119,
|
45677 |
+
"step": 6524
|
45678 |
+
},
|
45679 |
+
{
|
45680 |
+
"epoch": 0.9841628959276018,
|
45681 |
+
"grad_norm": 1.897386074066162,
|
45682 |
+
"learning_rate": 6.378222038630477e-08,
|
45683 |
+
"loss": 1.1226,
|
45684 |
+
"step": 6525
|
45685 |
+
},
|
45686 |
+
{
|
45687 |
+
"epoch": 0.984313725490196,
|
45688 |
+
"grad_norm": 1.8542094230651855,
|
45689 |
+
"learning_rate": 6.257335844399581e-08,
|
45690 |
+
"loss": 0.9392,
|
45691 |
+
"step": 6526
|
45692 |
+
},
|
45693 |
+
{
|
45694 |
+
"epoch": 0.9844645550527904,
|
45695 |
+
"grad_norm": 1.5449867248535156,
|
45696 |
+
"learning_rate": 6.137605494994092e-08,
|
45697 |
+
"loss": 0.6639,
|
45698 |
+
"step": 6527
|
45699 |
+
},
|
45700 |
+
{
|
45701 |
+
"epoch": 0.9846153846153847,
|
45702 |
+
"grad_norm": 1.8442318439483643,
|
45703 |
+
"learning_rate": 6.019031018126841e-08,
|
45704 |
+
"loss": 0.9855,
|
45705 |
+
"step": 6528
|
45706 |
+
},
|
45707 |
+
{
|
45708 |
+
"epoch": 0.9847662141779789,
|
45709 |
+
"grad_norm": 2.0847725868225098,
|
45710 |
+
"learning_rate": 5.9016124412430987e-08,
|
45711 |
+
"loss": 1.1598,
|
45712 |
+
"step": 6529
|
45713 |
+
},
|
45714 |
+
{
|
45715 |
+
"epoch": 0.9849170437405732,
|
45716 |
+
"grad_norm": 1.7849246263504028,
|
45717 |
+
"learning_rate": 5.785349791520012e-08,
|
45718 |
+
"loss": 0.9352,
|
45719 |
+
"step": 6530
|
45720 |
+
},
|
45721 |
+
{
|
45722 |
+
"epoch": 0.9850678733031675,
|
45723 |
+
"grad_norm": 1.9082951545715332,
|
45724 |
+
"learning_rate": 5.670243095867722e-08,
|
45725 |
+
"loss": 0.911,
|
45726 |
+
"step": 6531
|
45727 |
+
},
|
45728 |
+
{
|
45729 |
+
"epoch": 0.9852187028657617,
|
45730 |
+
"grad_norm": 1.6478004455566406,
|
45731 |
+
"learning_rate": 5.5562923809293624e-08,
|
45732 |
+
"loss": 0.7075,
|
45733 |
+
"step": 6532
|
45734 |
+
},
|
45735 |
+
{
|
45736 |
+
"epoch": 0.985369532428356,
|
45737 |
+
"grad_norm": 2.0368950366973877,
|
45738 |
+
"learning_rate": 5.4434976730788346e-08,
|
45739 |
+
"loss": 1.2577,
|
45740 |
+
"step": 6533
|
45741 |
+
},
|
45742 |
+
{
|
45743 |
+
"epoch": 0.9855203619909503,
|
45744 |
+
"grad_norm": 2.204418897628784,
|
45745 |
+
"learning_rate": 5.331858998423589e-08,
|
45746 |
+
"loss": 1.4991,
|
45747 |
+
"step": 6534
|
45748 |
+
},
|
45749 |
+
{
|
45750 |
+
"epoch": 0.9856711915535445,
|
45751 |
+
"grad_norm": 1.966247797012329,
|
45752 |
+
"learning_rate": 5.221376382803511e-08,
|
45753 |
+
"loss": 1.0373,
|
45754 |
+
"step": 6535
|
45755 |
+
},
|
45756 |
+
{
|
45757 |
+
"epoch": 0.9858220211161388,
|
45758 |
+
"grad_norm": 1.7758862972259521,
|
45759 |
+
"learning_rate": 5.1120498517914785e-08,
|
45760 |
+
"loss": 0.9066,
|
45761 |
+
"step": 6536
|
45762 |
+
},
|
45763 |
+
{
|
45764 |
+
"epoch": 0.9859728506787331,
|
45765 |
+
"grad_norm": 1.944427251815796,
|
45766 |
+
"learning_rate": 5.0038794306905834e-08,
|
45767 |
+
"loss": 1.1688,
|
45768 |
+
"step": 6537
|
45769 |
+
},
|
45770 |
+
{
|
45771 |
+
"epoch": 0.9861236802413273,
|
45772 |
+
"grad_norm": 1.944295048713684,
|
45773 |
+
"learning_rate": 4.896865144539131e-08,
|
45774 |
+
"loss": 0.981,
|
45775 |
+
"step": 6538
|
45776 |
+
},
|
45777 |
+
{
|
45778 |
+
"epoch": 0.9862745098039216,
|
45779 |
+
"grad_norm": 1.916852593421936,
|
45780 |
+
"learning_rate": 4.7910070181061974e-08,
|
45781 |
+
"loss": 1.1284,
|
45782 |
+
"step": 6539
|
45783 |
+
},
|
45784 |
+
{
|
45785 |
+
"epoch": 0.9864253393665159,
|
45786 |
+
"grad_norm": 2.07956862449646,
|
45787 |
+
"learning_rate": 4.686305075892738e-08,
|
45788 |
+
"loss": 1.198,
|
45789 |
+
"step": 6540
|
45790 |
+
},
|
45791 |
+
{
|
45792 |
+
"epoch": 0.9865761689291102,
|
45793 |
+
"grad_norm": 2.1623141765594482,
|
45794 |
+
"learning_rate": 4.5827593421338134e-08,
|
45795 |
+
"loss": 1.3789,
|
45796 |
+
"step": 6541
|
45797 |
+
},
|
45798 |
+
{
|
45799 |
+
"epoch": 0.9867269984917044,
|
45800 |
+
"grad_norm": 2.1320431232452393,
|
45801 |
+
"learning_rate": 4.480369840795806e-08,
|
45802 |
+
"loss": 1.2926,
|
45803 |
+
"step": 6542
|
45804 |
+
},
|
45805 |
+
{
|
45806 |
+
"epoch": 0.9868778280542987,
|
45807 |
+
"grad_norm": 2.1627719402313232,
|
45808 |
+
"learning_rate": 4.379136595577537e-08,
|
45809 |
+
"loss": 1.1043,
|
45810 |
+
"step": 6543
|
45811 |
+
},
|
45812 |
+
{
|
45813 |
+
"epoch": 0.987028657616893,
|
45814 |
+
"grad_norm": 2.2448055744171143,
|
45815 |
+
"learning_rate": 4.2790596299102646e-08,
|
45816 |
+
"loss": 1.14,
|
45817 |
+
"step": 6544
|
45818 |
+
},
|
45819 |
+
{
|
45820 |
+
"epoch": 0.9871794871794872,
|
45821 |
+
"grad_norm": 1.8780425786972046,
|
45822 |
+
"learning_rate": 4.1801389669576805e-08,
|
45823 |
+
"loss": 0.9128,
|
45824 |
+
"step": 6545
|
45825 |
+
},
|
45826 |
+
{
|
45827 |
+
"epoch": 0.9873303167420815,
|
45828 |
+
"grad_norm": 2.0032410621643066,
|
45829 |
+
"learning_rate": 4.082374629615915e-08,
|
45830 |
+
"loss": 0.9716,
|
45831 |
+
"step": 6546
|
45832 |
+
},
|
45833 |
+
{
|
45834 |
+
"epoch": 0.9874811463046758,
|
45835 |
+
"grad_norm": 1.782638669013977,
|
45836 |
+
"learning_rate": 3.985766640513533e-08,
|
45837 |
+
"loss": 0.8816,
|
45838 |
+
"step": 6547
|
45839 |
+
},
|
45840 |
+
{
|
45841 |
+
"epoch": 0.98763197586727,
|
45842 |
+
"grad_norm": 1.7118865251541138,
|
45843 |
+
"learning_rate": 3.890315022010982e-08,
|
45844 |
+
"loss": 0.7538,
|
45845 |
+
"step": 6548
|
45846 |
+
},
|
45847 |
+
{
|
45848 |
+
"epoch": 0.9877828054298643,
|
45849 |
+
"grad_norm": 1.3487775325775146,
|
45850 |
+
"learning_rate": 3.7960197962011447e-08,
|
45851 |
+
"loss": 0.5478,
|
45852 |
+
"step": 6549
|
45853 |
+
},
|
45854 |
+
{
|
45855 |
+
"epoch": 0.9879336349924586,
|
45856 |
+
"grad_norm": 1.5120551586151123,
|
45857 |
+
"learning_rate": 3.7028809849098955e-08,
|
45858 |
+
"loss": 0.578,
|
45859 |
+
"step": 6550
|
45860 |
+
},
|
45861 |
+
{
|
45862 |
+
"epoch": 0.9880844645550528,
|
45863 |
+
"grad_norm": 1.6499603986740112,
|
45864 |
+
"learning_rate": 3.610898609694991e-08,
|
45865 |
+
"loss": 0.9084,
|
45866 |
+
"step": 6551
|
45867 |
+
},
|
45868 |
+
{
|
45869 |
+
"epoch": 0.9882352941176471,
|
45870 |
+
"grad_norm": 2.073840379714966,
|
45871 |
+
"learning_rate": 3.520072691846621e-08,
|
45872 |
+
"loss": 1.3946,
|
45873 |
+
"step": 6552
|
45874 |
+
},
|
45875 |
+
{
|
45876 |
+
"epoch": 0.9883861236802414,
|
45877 |
+
"grad_norm": 2.301109552383423,
|
45878 |
+
"learning_rate": 3.43040325238686e-08,
|
45879 |
+
"loss": 1.5888,
|
45880 |
+
"step": 6553
|
45881 |
+
},
|
45882 |
+
{
|
45883 |
+
"epoch": 0.9885369532428356,
|
45884 |
+
"grad_norm": 2.0262253284454346,
|
45885 |
+
"learning_rate": 3.341890312070772e-08,
|
45886 |
+
"loss": 1.198,
|
45887 |
+
"step": 6554
|
45888 |
+
},
|
45889 |
+
{
|
45890 |
+
"epoch": 0.9886877828054299,
|
45891 |
+
"grad_norm": 1.9711953401565552,
|
45892 |
+
"learning_rate": 3.254533891385303e-08,
|
45893 |
+
"loss": 0.998,
|
45894 |
+
"step": 6555
|
45895 |
+
},
|
45896 |
+
{
|
45897 |
+
"epoch": 0.9888386123680242,
|
45898 |
+
"grad_norm": 1.9102327823638916,
|
45899 |
+
"learning_rate": 3.168334010549834e-08,
|
45900 |
+
"loss": 1.16,
|
45901 |
+
"step": 6556
|
45902 |
+
},
|
45903 |
+
{
|
45904 |
+
"epoch": 0.9889894419306184,
|
45905 |
+
"grad_norm": 1.8268523216247559,
|
45906 |
+
"learning_rate": 3.083290689516183e-08,
|
45907 |
+
"loss": 1.085,
|
45908 |
+
"step": 6557
|
45909 |
+
},
|
45910 |
+
{
|
45911 |
+
"epoch": 0.9891402714932127,
|
45912 |
+
"grad_norm": 1.8766851425170898,
|
45913 |
+
"learning_rate": 2.999403947968049e-08,
|
45914 |
+
"loss": 1.0967,
|
45915 |
+
"step": 6558
|
45916 |
+
},
|
45917 |
+
{
|
45918 |
+
"epoch": 0.989291101055807,
|
45919 |
+
"grad_norm": 1.867449164390564,
|
45920 |
+
"learning_rate": 2.9166738053221232e-08,
|
45921 |
+
"loss": 0.9528,
|
45922 |
+
"step": 6559
|
45923 |
+
},
|
45924 |
+
{
|
45925 |
+
"epoch": 0.9894419306184012,
|
45926 |
+
"grad_norm": 2.329340696334839,
|
45927 |
+
"learning_rate": 2.8351002807269767e-08,
|
45928 |
+
"loss": 1.5427,
|
45929 |
+
"step": 6560
|
45930 |
+
},
|
45931 |
+
{
|
45932 |
+
"epoch": 0.9895927601809955,
|
45933 |
+
"grad_norm": 1.7027629613876343,
|
45934 |
+
"learning_rate": 2.7546833930636173e-08,
|
45935 |
+
"loss": 0.8837,
|
45936 |
+
"step": 6561
|
45937 |
+
},
|
45938 |
+
{
|
45939 |
+
"epoch": 0.9897435897435898,
|
45940 |
+
"grad_norm": 1.9053348302841187,
|
45941 |
+
"learning_rate": 2.6754231609449344e-08,
|
45942 |
+
"loss": 1.0772,
|
45943 |
+
"step": 6562
|
45944 |
+
},
|
45945 |
+
{
|
45946 |
+
"epoch": 0.989894419306184,
|
45947 |
+
"grad_norm": 1.7392765283584595,
|
45948 |
+
"learning_rate": 2.5973196027162527e-08,
|
45949 |
+
"loss": 0.8626,
|
45950 |
+
"step": 6563
|
45951 |
+
},
|
45952 |
+
{
|
45953 |
+
"epoch": 0.9900452488687783,
|
45954 |
+
"grad_norm": 1.7653361558914185,
|
45955 |
+
"learning_rate": 2.5203727364558892e-08,
|
45956 |
+
"loss": 1.1656,
|
45957 |
+
"step": 6564
|
45958 |
+
},
|
45959 |
+
{
|
45960 |
+
"epoch": 0.9901960784313726,
|
45961 |
+
"grad_norm": 2.0128557682037354,
|
45962 |
+
"learning_rate": 2.4445825799729317e-08,
|
45963 |
+
"loss": 1.1097,
|
45964 |
+
"step": 6565
|
45965 |
+
},
|
45966 |
+
{
|
45967 |
+
"epoch": 0.9903469079939669,
|
45968 |
+
"grad_norm": 1.5651352405548096,
|
45969 |
+
"learning_rate": 2.3699491508105687e-08,
|
45970 |
+
"loss": 0.7791,
|
45971 |
+
"step": 6566
|
45972 |
+
},
|
45973 |
+
{
|
45974 |
+
"epoch": 0.9904977375565611,
|
45975 |
+
"grad_norm": 1.7918542623519897,
|
45976 |
+
"learning_rate": 2.2964724662433156e-08,
|
45977 |
+
"loss": 1.0578,
|
45978 |
+
"step": 6567
|
45979 |
+
},
|
45980 |
+
{
|
45981 |
+
"epoch": 0.9906485671191554,
|
45982 |
+
"grad_norm": 2.0644359588623047,
|
45983 |
+
"learning_rate": 2.224152543277569e-08,
|
45984 |
+
"loss": 1.1738,
|
45985 |
+
"step": 6568
|
45986 |
+
},
|
45987 |
+
{
|
45988 |
+
"epoch": 0.9907993966817497,
|
45989 |
+
"grad_norm": 1.8133044242858887,
|
45990 |
+
"learning_rate": 2.152989398652161e-08,
|
45991 |
+
"loss": 1.0495,
|
45992 |
+
"step": 6569
|
45993 |
+
},
|
45994 |
+
{
|
45995 |
+
"epoch": 0.9909502262443439,
|
45996 |
+
"grad_norm": 1.6463404893875122,
|
45997 |
+
"learning_rate": 2.0829830488389156e-08,
|
45998 |
+
"loss": 0.726,
|
45999 |
+
"step": 6570
|
46000 |
+
},
|
46001 |
+
{
|
46002 |
+
"epoch": 0.9911010558069382,
|
46003 |
+
"grad_norm": 1.8328197002410889,
|
46004 |
+
"learning_rate": 2.014133510041538e-08,
|
46005 |
+
"loss": 0.976,
|
46006 |
+
"step": 6571
|
46007 |
+
},
|
46008 |
+
{
|
46009 |
+
"epoch": 0.9912518853695325,
|
46010 |
+
"grad_norm": 1.6490834951400757,
|
46011 |
+
"learning_rate": 1.9464407981956146e-08,
|
46012 |
+
"loss": 0.8504,
|
46013 |
+
"step": 6572
|
46014 |
+
},
|
46015 |
+
{
|
46016 |
+
"epoch": 0.9914027149321267,
|
46017 |
+
"grad_norm": 1.933363914489746,
|
46018 |
+
"learning_rate": 1.879904928969167e-08,
|
46019 |
+
"loss": 0.9774,
|
46020 |
+
"step": 6573
|
46021 |
+
},
|
46022 |
+
{
|
46023 |
+
"epoch": 0.991553544494721,
|
46024 |
+
"grad_norm": 1.8430428504943848,
|
46025 |
+
"learning_rate": 1.8145259177621e-08,
|
46026 |
+
"loss": 0.8791,
|
46027 |
+
"step": 6574
|
46028 |
+
},
|
46029 |
+
{
|
46030 |
+
"epoch": 0.9917043740573153,
|
46031 |
+
"grad_norm": 2.158477544784546,
|
46032 |
+
"learning_rate": 1.7503037797078626e-08,
|
46033 |
+
"loss": 1.3405,
|
46034 |
+
"step": 6575
|
46035 |
+
},
|
46036 |
+
{
|
46037 |
+
"epoch": 0.9918552036199095,
|
46038 |
+
"grad_norm": 2.0298755168914795,
|
46039 |
+
"learning_rate": 1.687238529670121e-08,
|
46040 |
+
"loss": 1.2048,
|
46041 |
+
"step": 6576
|
46042 |
+
},
|
46043 |
+
{
|
46044 |
+
"epoch": 0.9920060331825038,
|
46045 |
+
"grad_norm": 2.078059196472168,
|
46046 |
+
"learning_rate": 1.6253301822466428e-08,
|
46047 |
+
"loss": 1.1711,
|
46048 |
+
"step": 6577
|
46049 |
+
},
|
46050 |
+
{
|
46051 |
+
"epoch": 0.9921568627450981,
|
46052 |
+
"grad_norm": 1.9220235347747803,
|
46053 |
+
"learning_rate": 1.5645787517670762e-08,
|
46054 |
+
"loss": 0.8961,
|
46055 |
+
"step": 6578
|
46056 |
+
},
|
46057 |
+
{
|
46058 |
+
"epoch": 0.9923076923076923,
|
46059 |
+
"grad_norm": 2.269637107849121,
|
46060 |
+
"learning_rate": 1.5049842522918412e-08,
|
46061 |
+
"loss": 1.4525,
|
46062 |
+
"step": 6579
|
46063 |
+
},
|
46064 |
+
{
|
46065 |
+
"epoch": 0.9924585218702866,
|
46066 |
+
"grad_norm": 2.0240731239318848,
|
46067 |
+
"learning_rate": 1.4465466976149034e-08,
|
46068 |
+
"loss": 1.2533,
|
46069 |
+
"step": 6580
|
46070 |
+
},
|
46071 |
+
{
|
46072 |
+
"epoch": 0.9926093514328809,
|
46073 |
+
"grad_norm": 2.1804757118225098,
|
46074 |
+
"learning_rate": 1.38926610126211e-08,
|
46075 |
+
"loss": 1.1277,
|
46076 |
+
"step": 6581
|
46077 |
+
},
|
46078 |
+
{
|
46079 |
+
"epoch": 0.9927601809954751,
|
46080 |
+
"grad_norm": 2.2509958744049072,
|
46081 |
+
"learning_rate": 1.3331424764922994e-08,
|
46082 |
+
"loss": 1.1611,
|
46083 |
+
"step": 6582
|
46084 |
+
},
|
46085 |
+
{
|
46086 |
+
"epoch": 0.9929110105580694,
|
46087 |
+
"grad_norm": 1.9428391456604004,
|
46088 |
+
"learning_rate": 1.2781758362945262e-08,
|
46089 |
+
"loss": 1.0481,
|
46090 |
+
"step": 6583
|
46091 |
+
},
|
46092 |
+
{
|
46093 |
+
"epoch": 0.9930618401206637,
|
46094 |
+
"grad_norm": 1.855837106704712,
|
46095 |
+
"learning_rate": 1.224366193392501e-08,
|
46096 |
+
"loss": 0.8939,
|
46097 |
+
"step": 6584
|
46098 |
+
},
|
46099 |
+
{
|
46100 |
+
"epoch": 0.9932126696832579,
|
46101 |
+
"grad_norm": 1.8608096837997437,
|
46102 |
+
"learning_rate": 1.1717135602401507e-08,
|
46103 |
+
"loss": 0.9874,
|
46104 |
+
"step": 6585
|
46105 |
+
},
|
46106 |
+
{
|
46107 |
+
"epoch": 0.9933634992458522,
|
46108 |
+
"grad_norm": 1.6831547021865845,
|
46109 |
+
"learning_rate": 1.1202179490243937e-08,
|
46110 |
+
"loss": 0.9085,
|
46111 |
+
"step": 6586
|
46112 |
+
},
|
46113 |
+
{
|
46114 |
+
"epoch": 0.9935143288084465,
|
46115 |
+
"grad_norm": 2.1930525302886963,
|
46116 |
+
"learning_rate": 1.069879371664584e-08,
|
46117 |
+
"loss": 1.0266,
|
46118 |
+
"step": 6587
|
46119 |
+
},
|
46120 |
+
{
|
46121 |
+
"epoch": 0.9936651583710407,
|
46122 |
+
"grad_norm": 1.843945026397705,
|
46123 |
+
"learning_rate": 1.0206978398119572e-08,
|
46124 |
+
"loss": 1.0207,
|
46125 |
+
"step": 6588
|
46126 |
+
},
|
46127 |
+
{
|
46128 |
+
"epoch": 0.993815987933635,
|
46129 |
+
"grad_norm": 1.9906690120697021,
|
46130 |
+
"learning_rate": 9.72673364850185e-09,
|
46131 |
+
"loss": 1.1523,
|
46132 |
+
"step": 6589
|
46133 |
+
},
|
46134 |
+
{
|
46135 |
+
"epoch": 0.9939668174962293,
|
46136 |
+
"grad_norm": 2.588918685913086,
|
46137 |
+
"learning_rate": 9.258059578948209e-09,
|
46138 |
+
"loss": 1.4171,
|
46139 |
+
"step": 6590
|
46140 |
+
},
|
46141 |
+
{
|
46142 |
+
"epoch": 0.9941176470588236,
|
46143 |
+
"grad_norm": 2.232747793197632,
|
46144 |
+
"learning_rate": 8.800956297932983e-09,
|
46145 |
+
"loss": 1.1359,
|
46146 |
+
"step": 6591
|
46147 |
+
},
|
46148 |
+
{
|
46149 |
+
"epoch": 0.9942684766214178,
|
46150 |
+
"grad_norm": 1.9260307550430298,
|
46151 |
+
"learning_rate": 8.35542391126598e-09,
|
46152 |
+
"loss": 0.8841,
|
46153 |
+
"step": 6592
|
46154 |
+
},
|
46155 |
+
{
|
46156 |
+
"epoch": 0.9944193061840121,
|
46157 |
+
"grad_norm": 2.2145347595214844,
|
46158 |
+
"learning_rate": 7.921462522059164e-09,
|
46159 |
+
"loss": 1.0699,
|
46160 |
+
"step": 6593
|
46161 |
+
},
|
46162 |
+
{
|
46163 |
+
"epoch": 0.9945701357466064,
|
46164 |
+
"grad_norm": 2.3072757720947266,
|
46165 |
+
"learning_rate": 7.499072230765514e-09,
|
46166 |
+
"loss": 1.0223,
|
46167 |
+
"step": 6594
|
46168 |
+
},
|
46169 |
+
{
|
46170 |
+
"epoch": 0.9947209653092006,
|
46171 |
+
"grad_norm": 1.863654613494873,
|
46172 |
+
"learning_rate": 7.088253135145717e-09,
|
46173 |
+
"loss": 0.9445,
|
46174 |
+
"step": 6595
|
46175 |
+
},
|
46176 |
+
{
|
46177 |
+
"epoch": 0.9948717948717949,
|
46178 |
+
"grad_norm": 2.0308523178100586,
|
46179 |
+
"learning_rate": 6.6890053302848255e-09,
|
46180 |
+
"loss": 1.0649,
|
46181 |
+
"step": 6596
|
46182 |
+
},
|
46183 |
+
{
|
46184 |
+
"epoch": 0.9950226244343892,
|
46185 |
+
"grad_norm": 1.6063568592071533,
|
46186 |
+
"learning_rate": 6.301328908597803e-09,
|
46187 |
+
"loss": 0.7339,
|
46188 |
+
"step": 6597
|
46189 |
+
},
|
46190 |
+
{
|
46191 |
+
"epoch": 0.9951734539969834,
|
46192 |
+
"grad_norm": 1.5589513778686523,
|
46193 |
+
"learning_rate": 5.925223959818427e-09,
|
46194 |
+
"loss": 0.5174,
|
46195 |
+
"step": 6598
|
46196 |
+
},
|
46197 |
+
{
|
46198 |
+
"epoch": 0.9953242835595777,
|
46199 |
+
"grad_norm": 1.47749924659729,
|
46200 |
+
"learning_rate": 5.560690570988181e-09,
|
46201 |
+
"loss": 0.6071,
|
46202 |
+
"step": 6599
|
46203 |
+
},
|
46204 |
+
{
|
46205 |
+
"epoch": 0.995475113122172,
|
46206 |
+
"grad_norm": 2.1695616245269775,
|
46207 |
+
"learning_rate": 5.207728826495118e-09,
|
46208 |
+
"loss": 1.0439,
|
46209 |
+
"step": 6600
|
46210 |
+
},
|
46211 |
+
{
|
46212 |
+
"epoch": 0.9956259426847662,
|
46213 |
+
"grad_norm": 1.7432292699813843,
|
46214 |
+
"learning_rate": 4.866338808023896e-09,
|
46215 |
+
"loss": 1.0061,
|
46216 |
+
"step": 6601
|
46217 |
+
},
|
46218 |
+
{
|
46219 |
+
"epoch": 0.9957767722473605,
|
46220 |
+
"grad_norm": 1.63108491897583,
|
46221 |
+
"learning_rate": 4.53652059459464e-09,
|
46222 |
+
"loss": 0.9774,
|
46223 |
+
"step": 6602
|
46224 |
+
},
|
46225 |
+
{
|
46226 |
+
"epoch": 0.9959276018099548,
|
46227 |
+
"grad_norm": 1.8450249433517456,
|
46228 |
+
"learning_rate": 4.218274262551835e-09,
|
46229 |
+
"loss": 1.0761,
|
46230 |
+
"step": 6603
|
46231 |
+
},
|
46232 |
+
{
|
46233 |
+
"epoch": 0.996078431372549,
|
46234 |
+
"grad_norm": 1.7824820280075073,
|
46235 |
+
"learning_rate": 3.91159988555323e-09,
|
46236 |
+
"loss": 1.0144,
|
46237 |
+
"step": 6604
|
46238 |
+
},
|
46239 |
+
{
|
46240 |
+
"epoch": 0.9962292609351433,
|
46241 |
+
"grad_norm": 1.6914958953857422,
|
46242 |
+
"learning_rate": 3.6164975345809316e-09,
|
46243 |
+
"loss": 0.8875,
|
46244 |
+
"step": 6605
|
46245 |
+
},
|
46246 |
+
{
|
46247 |
+
"epoch": 0.9963800904977376,
|
46248 |
+
"grad_norm": 1.807647943496704,
|
46249 |
+
"learning_rate": 3.3329672779414124e-09,
|
46250 |
+
"loss": 0.9614,
|
46251 |
+
"step": 6606
|
46252 |
+
},
|
46253 |
+
{
|
46254 |
+
"epoch": 0.9965309200603318,
|
46255 |
+
"grad_norm": 1.8976490497589111,
|
46256 |
+
"learning_rate": 3.061009181254404e-09,
|
46257 |
+
"loss": 1.1459,
|
46258 |
+
"step": 6607
|
46259 |
+
},
|
46260 |
+
{
|
46261 |
+
"epoch": 0.9966817496229261,
|
46262 |
+
"grad_norm": 1.571880578994751,
|
46263 |
+
"learning_rate": 2.800623307469552e-09,
|
46264 |
+
"loss": 0.7526,
|
46265 |
+
"step": 6608
|
46266 |
+
},
|
46267 |
+
{
|
46268 |
+
"epoch": 0.9968325791855204,
|
46269 |
+
"grad_norm": 2.0932419300079346,
|
46270 |
+
"learning_rate": 2.5518097168608646e-09,
|
46271 |
+
"loss": 1.3696,
|
46272 |
+
"step": 6609
|
46273 |
+
},
|
46274 |
+
{
|
46275 |
+
"epoch": 0.9969834087481146,
|
46276 |
+
"grad_norm": 1.9494574069976807,
|
46277 |
+
"learning_rate": 2.3145684670100587e-09,
|
46278 |
+
"loss": 1.2108,
|
46279 |
+
"step": 6610
|
46280 |
+
},
|
46281 |
+
{
|
46282 |
+
"epoch": 0.9971342383107089,
|
46283 |
+
"grad_norm": 1.837791919708252,
|
46284 |
+
"learning_rate": 2.0888996128343164e-09,
|
46285 |
+
"loss": 1.0885,
|
46286 |
+
"step": 6611
|
46287 |
+
},
|
46288 |
+
{
|
46289 |
+
"epoch": 0.9972850678733032,
|
46290 |
+
"grad_norm": 1.9261425733566284,
|
46291 |
+
"learning_rate": 1.8748032065640797e-09,
|
46292 |
+
"loss": 1.0092,
|
46293 |
+
"step": 6612
|
46294 |
+
},
|
46295 |
+
{
|
46296 |
+
"epoch": 0.9974358974358974,
|
46297 |
+
"grad_norm": 1.8976205587387085,
|
46298 |
+
"learning_rate": 1.6722792977541535e-09,
|
46299 |
+
"loss": 0.9953,
|
46300 |
+
"step": 6613
|
46301 |
+
},
|
46302 |
+
{
|
46303 |
+
"epoch": 0.9975867269984917,
|
46304 |
+
"grad_norm": 2.369472026824951,
|
46305 |
+
"learning_rate": 1.4813279332781538e-09,
|
46306 |
+
"loss": 1.2183,
|
46307 |
+
"step": 6614
|
46308 |
+
},
|
46309 |
+
{
|
46310 |
+
"epoch": 0.997737556561086,
|
46311 |
+
"grad_norm": 2.169970750808716,
|
46312 |
+
"learning_rate": 1.3019491573396104e-09,
|
46313 |
+
"loss": 1.3011,
|
46314 |
+
"step": 6615
|
46315 |
+
},
|
46316 |
+
{
|
46317 |
+
"epoch": 0.9978883861236802,
|
46318 |
+
"grad_norm": 1.7789149284362793,
|
46319 |
+
"learning_rate": 1.1341430114553132e-09,
|
46320 |
+
"loss": 0.8241,
|
46321 |
+
"step": 6616
|
46322 |
+
},
|
46323 |
+
{
|
46324 |
+
"epoch": 0.9980392156862745,
|
46325 |
+
"grad_norm": 1.9637186527252197,
|
46326 |
+
"learning_rate": 9.779095344608636e-10,
|
46327 |
+
"loss": 0.9467,
|
46328 |
+
"step": 6617
|
46329 |
+
},
|
46330 |
+
{
|
46331 |
+
"epoch": 0.9981900452488688,
|
46332 |
+
"grad_norm": 1.9668041467666626,
|
46333 |
+
"learning_rate": 8.332487625217767e-10,
|
46334 |
+
"loss": 1.0528,
|
46335 |
+
"step": 6618
|
46336 |
+
},
|
46337 |
+
{
|
46338 |
+
"epoch": 0.998340874811463,
|
46339 |
+
"grad_norm": 1.6691468954086304,
|
46340 |
+
"learning_rate": 7.001607291168278e-10,
|
46341 |
+
"loss": 0.8094,
|
46342 |
+
"step": 6619
|
46343 |
+
},
|
46344 |
+
{
|
46345 |
+
"epoch": 0.9984917043740573,
|
46346 |
+
"grad_norm": 2.0753369331359863,
|
46347 |
+
"learning_rate": 5.786454650602568e-10,
|
46348 |
+
"loss": 1.1893,
|
46349 |
+
"step": 6620
|
46350 |
+
},
|
46351 |
+
{
|
46352 |
+
"epoch": 0.9986425339366516,
|
46353 |
+
"grad_norm": 2.0562314987182617,
|
46354 |
+
"learning_rate": 4.68702998462911e-10,
|
46355 |
+
"loss": 1.2021,
|
46356 |
+
"step": 6621
|
46357 |
+
},
|
46358 |
+
{
|
46359 |
+
"epoch": 0.9987933634992459,
|
46360 |
+
"grad_norm": 2.3388001918792725,
|
46361 |
+
"learning_rate": 3.7033335478775523e-10,
|
46362 |
+
"loss": 1.2424,
|
46363 |
+
"step": 6622
|
46364 |
+
},
|
46365 |
+
{
|
46366 |
+
"epoch": 0.9989441930618401,
|
46367 |
+
"grad_norm": 1.9954073429107666,
|
46368 |
+
"learning_rate": 2.8353655679436154e-10,
|
46369 |
+
"loss": 0.9312,
|
46370 |
+
"step": 6623
|
46371 |
+
},
|
46372 |
+
{
|
46373 |
+
"epoch": 0.9990950226244344,
|
46374 |
+
"grad_norm": 2.308021068572998,
|
46375 |
+
"learning_rate": 2.0831262457221557e-10,
|
46376 |
+
"loss": 1.4345,
|
46377 |
+
"step": 6624
|
46378 |
+
},
|
46379 |
+
{
|
46380 |
+
"epoch": 0.9992458521870287,
|
46381 |
+
"grad_norm": 1.8140825033187866,
|
46382 |
+
"learning_rate": 1.4466157553516547e-10,
|
46383 |
+
"loss": 0.8634,
|
46384 |
+
"step": 6625
|
46385 |
+
},
|
46386 |
+
{
|
46387 |
+
"epoch": 0.9993966817496229,
|
46388 |
+
"grad_norm": 1.8196262121200562,
|
46389 |
+
"learning_rate": 9.258342441587076e-11,
|
46390 |
+
"loss": 0.8215,
|
46391 |
+
"step": 6626
|
46392 |
+
},
|
46393 |
+
{
|
46394 |
+
"epoch": 0.9995475113122172,
|
46395 |
+
"grad_norm": 1.757359504699707,
|
46396 |
+
"learning_rate": 5.207818326580238e-11,
|
46397 |
+
"loss": 0.8753,
|
46398 |
+
"step": 6627
|
46399 |
+
},
|
46400 |
+
{
|
46401 |
+
"epoch": 0.9996983408748115,
|
46402 |
+
"grad_norm": 2.1403098106384277,
|
46403 |
+
"learning_rate": 2.3145861460793782e-11,
|
46404 |
+
"loss": 1.122,
|
46405 |
+
"step": 6628
|
46406 |
+
},
|
46407 |
+
{
|
46408 |
+
"epoch": 0.9998491704374057,
|
46409 |
+
"grad_norm": 1.536190390586853,
|
46410 |
+
"learning_rate": 5.78646570104091e-12,
|
46411 |
+
"loss": 0.7415,
|
46412 |
+
"step": 6629
|
46413 |
+
},
|
46414 |
+
{
|
46415 |
+
"epoch": 1.0,
|
46416 |
+
"grad_norm": 2.804316520690918,
|
46417 |
+
"learning_rate": 0.0,
|
46418 |
+
"loss": 1.0438,
|
46419 |
+
"step": 6630
|
46420 |
}
|
46421 |
],
|
46422 |
"logging_steps": 1,
|
|
|
46431 |
"should_evaluate": false,
|
46432 |
"should_log": false,
|
46433 |
"should_save": true,
|
46434 |
+
"should_training_stop": true
|
46435 |
},
|
46436 |
"attributes": {}
|
46437 |
}
|
46438 |
},
|
46439 |
+
"total_flos": 7.369274315715379e+18,
|
46440 |
"train_batch_size": 4,
|
46441 |
"trial_name": null,
|
46442 |
"trial_params": null
|