Training in progress, step 10256, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 903834408
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be8f064d453a57ea7273970350761d964c9807f1c078006d860d03a3b99aecd4
|
3 |
size 903834408
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1807824186
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12874bf81eb0524d19891bca2ddcc85a7634979c700dd291ce0217fc58255786
|
3 |
size 1807824186
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed797abbb6e575252328647d2519975ca5213c166e33893bf2b6cbc2d2b0579d
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:075aea1ae34d22fd6b751b1b0ff5783a336ded95b0d65058f0b4391e6f395a77
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 16,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -77447,6 +77447,127 @@
|
|
77447 |
"eval_samples_per_second": 11.726,
|
77448 |
"eval_steps_per_second": 1.466,
|
77449 |
"step": 10240
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77450 |
}
|
77451 |
],
|
77452 |
"logging_steps": 1,
|
@@ -77466,7 +77587,7 @@
|
|
77466 |
"attributes": {}
|
77467 |
}
|
77468 |
},
|
77469 |
-
"total_flos": 1.
|
77470 |
"train_batch_size": 8,
|
77471 |
"trial_name": null,
|
77472 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6630141414141414,
|
5 |
"eval_steps": 16,
|
6 |
+
"global_step": 10256,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
77447 |
"eval_samples_per_second": 11.726,
|
77448 |
"eval_steps_per_second": 1.466,
|
77449 |
"step": 10240
|
77450 |
+
},
|
77451 |
+
{
|
77452 |
+
"epoch": 0.6620444444444444,
|
77453 |
+
"grad_norm": 0.049845460802316666,
|
77454 |
+
"learning_rate": 0.00017849373338296403,
|
77455 |
+
"loss": 0.0913,
|
77456 |
+
"step": 10241
|
77457 |
+
},
|
77458 |
+
{
|
77459 |
+
"epoch": 0.6621090909090909,
|
77460 |
+
"grad_norm": 0.0473443903028965,
|
77461 |
+
"learning_rate": 0.00017848949616165787,
|
77462 |
+
"loss": 0.0742,
|
77463 |
+
"step": 10242
|
77464 |
+
},
|
77465 |
+
{
|
77466 |
+
"epoch": 0.6621737373737374,
|
77467 |
+
"grad_norm": 0.05419163033366203,
|
77468 |
+
"learning_rate": 0.0001784852585732823,
|
77469 |
+
"loss": 0.0863,
|
77470 |
+
"step": 10243
|
77471 |
+
},
|
77472 |
+
{
|
77473 |
+
"epoch": 0.6622383838383838,
|
77474 |
+
"grad_norm": 0.06253422796726227,
|
77475 |
+
"learning_rate": 0.00017848102061785709,
|
77476 |
+
"loss": 0.1065,
|
77477 |
+
"step": 10244
|
77478 |
+
},
|
77479 |
+
{
|
77480 |
+
"epoch": 0.6623030303030303,
|
77481 |
+
"grad_norm": 0.057101961225271225,
|
77482 |
+
"learning_rate": 0.0001784767822954021,
|
77483 |
+
"loss": 0.1016,
|
77484 |
+
"step": 10245
|
77485 |
+
},
|
77486 |
+
{
|
77487 |
+
"epoch": 0.6623676767676767,
|
77488 |
+
"grad_norm": 0.06272678822278976,
|
77489 |
+
"learning_rate": 0.00017847254360593717,
|
77490 |
+
"loss": 0.0794,
|
77491 |
+
"step": 10246
|
77492 |
+
},
|
77493 |
+
{
|
77494 |
+
"epoch": 0.6624323232323233,
|
77495 |
+
"grad_norm": 0.051759131252765656,
|
77496 |
+
"learning_rate": 0.00017846830454948208,
|
77497 |
+
"loss": 0.0738,
|
77498 |
+
"step": 10247
|
77499 |
+
},
|
77500 |
+
{
|
77501 |
+
"epoch": 0.6624969696969697,
|
77502 |
+
"grad_norm": 0.05439075082540512,
|
77503 |
+
"learning_rate": 0.00017846406512605668,
|
77504 |
+
"loss": 0.0944,
|
77505 |
+
"step": 10248
|
77506 |
+
},
|
77507 |
+
{
|
77508 |
+
"epoch": 0.6625616161616161,
|
77509 |
+
"grad_norm": 0.05330037698149681,
|
77510 |
+
"learning_rate": 0.00017845982533568075,
|
77511 |
+
"loss": 0.088,
|
77512 |
+
"step": 10249
|
77513 |
+
},
|
77514 |
+
{
|
77515 |
+
"epoch": 0.6626262626262627,
|
77516 |
+
"grad_norm": 0.04516725614666939,
|
77517 |
+
"learning_rate": 0.0001784555851783742,
|
77518 |
+
"loss": 0.0759,
|
77519 |
+
"step": 10250
|
77520 |
+
},
|
77521 |
+
{
|
77522 |
+
"epoch": 0.6626909090909091,
|
77523 |
+
"grad_norm": 0.054199665784835815,
|
77524 |
+
"learning_rate": 0.0001784513446541568,
|
77525 |
+
"loss": 0.0869,
|
77526 |
+
"step": 10251
|
77527 |
+
},
|
77528 |
+
{
|
77529 |
+
"epoch": 0.6627555555555555,
|
77530 |
+
"grad_norm": 0.05711120367050171,
|
77531 |
+
"learning_rate": 0.0001784471037630484,
|
77532 |
+
"loss": 0.0828,
|
77533 |
+
"step": 10252
|
77534 |
+
},
|
77535 |
+
{
|
77536 |
+
"epoch": 0.662820202020202,
|
77537 |
+
"grad_norm": 0.049388039857149124,
|
77538 |
+
"learning_rate": 0.00017844286250506884,
|
77539 |
+
"loss": 0.0858,
|
77540 |
+
"step": 10253
|
77541 |
+
},
|
77542 |
+
{
|
77543 |
+
"epoch": 0.6628848484848485,
|
77544 |
+
"grad_norm": 0.04961549863219261,
|
77545 |
+
"learning_rate": 0.0001784386208802379,
|
77546 |
+
"loss": 0.0852,
|
77547 |
+
"step": 10254
|
77548 |
+
},
|
77549 |
+
{
|
77550 |
+
"epoch": 0.662949494949495,
|
77551 |
+
"grad_norm": 0.05711861327290535,
|
77552 |
+
"learning_rate": 0.0001784343788885755,
|
77553 |
+
"loss": 0.0849,
|
77554 |
+
"step": 10255
|
77555 |
+
},
|
77556 |
+
{
|
77557 |
+
"epoch": 0.6630141414141414,
|
77558 |
+
"grad_norm": 0.06972243636846542,
|
77559 |
+
"learning_rate": 0.00017843013653010144,
|
77560 |
+
"loss": 0.1039,
|
77561 |
+
"step": 10256
|
77562 |
+
},
|
77563 |
+
{
|
77564 |
+
"epoch": 0.6630141414141414,
|
77565 |
+
"eval_bleu": 20.086468326664342,
|
77566 |
+
"eval_loss": 0.08900181949138641,
|
77567 |
+
"eval_runtime": 2.8036,
|
77568 |
+
"eval_samples_per_second": 11.414,
|
77569 |
+
"eval_steps_per_second": 1.427,
|
77570 |
+
"step": 10256
|
77571 |
}
|
77572 |
],
|
77573 |
"logging_steps": 1,
|
|
|
77587 |
"attributes": {}
|
77588 |
}
|
77589 |
},
|
77590 |
+
"total_flos": 1.9985510801866752e+17,
|
77591 |
"train_batch_size": 8,
|
77592 |
"trial_name": null,
|
77593 |
"trial_params": null
|