Training in progress, step 279, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 639691872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0cf25f76e8662756856fd1a097be6ff3870d9f310686bf41f15fe00f6e14dc4
|
3 |
size 639691872
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:742095389fc358571b77e198008ebd180a04e2384f21dd14cc648803058e1cfe
|
3 |
+
size 325340244
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28e1effb06033458f08c521267ddbc73b4a5a3e148e528b2cfd2ce1d0d17a805
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de2e7670b3561000eee216684d0727bea9800d1c3f3b2422105732155595c43d
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 31,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -653,6 +653,91 @@
|
|
653 |
"eval_samples_per_second": 15.489,
|
654 |
"eval_steps_per_second": 1.936,
|
655 |
"step": 248
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
656 |
}
|
657 |
],
|
658 |
"logging_steps": 3,
|
@@ -672,7 +757,7 @@
|
|
672 |
"attributes": {}
|
673 |
}
|
674 |
},
|
675 |
-
"total_flos": 3.
|
676 |
"train_batch_size": 8,
|
677 |
"trial_name": null,
|
678 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.26369168356998,
|
5 |
"eval_steps": 31,
|
6 |
+
"global_step": 279,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
653 |
"eval_samples_per_second": 15.489,
|
654 |
"eval_steps_per_second": 1.936,
|
655 |
"step": 248
|
656 |
+
},
|
657 |
+
{
|
658 |
+
"epoch": 2.020283975659229,
|
659 |
+
"grad_norm": 3.395972967147827,
|
660 |
+
"learning_rate": 2.537882199482665e-05,
|
661 |
+
"loss": 1.1378,
|
662 |
+
"step": 249
|
663 |
+
},
|
664 |
+
{
|
665 |
+
"epoch": 2.0446247464503045,
|
666 |
+
"grad_norm": 4.442982196807861,
|
667 |
+
"learning_rate": 2.4248096254497288e-05,
|
668 |
+
"loss": 1.253,
|
669 |
+
"step": 252
|
670 |
+
},
|
671 |
+
{
|
672 |
+
"epoch": 2.0689655172413794,
|
673 |
+
"grad_norm": 5.2550764083862305,
|
674 |
+
"learning_rate": 2.3135019582658802e-05,
|
675 |
+
"loss": 1.0433,
|
676 |
+
"step": 255
|
677 |
+
},
|
678 |
+
{
|
679 |
+
"epoch": 2.0933062880324544,
|
680 |
+
"grad_norm": 4.615274429321289,
|
681 |
+
"learning_rate": 2.2040354826462668e-05,
|
682 |
+
"loss": 1.1078,
|
683 |
+
"step": 258
|
684 |
+
},
|
685 |
+
{
|
686 |
+
"epoch": 2.1176470588235294,
|
687 |
+
"grad_norm": 5.723622798919678,
|
688 |
+
"learning_rate": 2.0964852214453013e-05,
|
689 |
+
"loss": 0.9585,
|
690 |
+
"step": 261
|
691 |
+
},
|
692 |
+
{
|
693 |
+
"epoch": 2.1419878296146044,
|
694 |
+
"grad_norm": 4.3719587326049805,
|
695 |
+
"learning_rate": 1.9909248842397584e-05,
|
696 |
+
"loss": 0.9587,
|
697 |
+
"step": 264
|
698 |
+
},
|
699 |
+
{
|
700 |
+
"epoch": 2.1663286004056794,
|
701 |
+
"grad_norm": 6.265243053436279,
|
702 |
+
"learning_rate": 1.887426816811903e-05,
|
703 |
+
"loss": 0.9681,
|
704 |
+
"step": 267
|
705 |
+
},
|
706 |
+
{
|
707 |
+
"epoch": 2.1906693711967544,
|
708 |
+
"grad_norm": 5.796363830566406,
|
709 |
+
"learning_rate": 1.7860619515673033e-05,
|
710 |
+
"loss": 1.0059,
|
711 |
+
"step": 270
|
712 |
+
},
|
713 |
+
{
|
714 |
+
"epoch": 2.2150101419878294,
|
715 |
+
"grad_norm": 5.817225456237793,
|
716 |
+
"learning_rate": 1.6868997589213136e-05,
|
717 |
+
"loss": 1.0253,
|
718 |
+
"step": 273
|
719 |
+
},
|
720 |
+
{
|
721 |
+
"epoch": 2.239350912778905,
|
722 |
+
"grad_norm": 4.450856685638428,
|
723 |
+
"learning_rate": 1.5900081996875083e-05,
|
724 |
+
"loss": 0.7533,
|
725 |
+
"step": 276
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 2.26369168356998,
|
729 |
+
"grad_norm": 7.340899467468262,
|
730 |
+
"learning_rate": 1.4954536785007456e-05,
|
731 |
+
"loss": 0.9246,
|
732 |
+
"step": 279
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 2.26369168356998,
|
736 |
+
"eval_loss": 0.5402039885520935,
|
737 |
+
"eval_runtime": 13.4355,
|
738 |
+
"eval_samples_per_second": 15.481,
|
739 |
+
"eval_steps_per_second": 1.935,
|
740 |
+
"step": 279
|
741 |
}
|
742 |
],
|
743 |
"logging_steps": 3,
|
|
|
757 |
"attributes": {}
|
758 |
}
|
759 |
},
|
760 |
+
"total_flos": 3.706262291176489e+17,
|
761 |
"train_batch_size": 8,
|
762 |
"trial_name": null,
|
763 |
"trial_params": null
|