Joshua Lochner
commited on
Commit
·
f42ed7c
1
Parent(s):
db94bb7
Next training iteration (1.28m)
Browse files- added_tokens.json +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +63 -3
- training_args.bin +1 -1
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"NO_SEGMENT_TOKEN": 32112, "BETWEEN_SEGMENTS_TOKEN": 32111, "NUMBER_TOKEN": 32103, "START_SPONSOR_TOKEN": 32113, "START_SELFPROMO_TOKEN": 32115, "END_SPONSOR_TOKEN": 32114, "END_INTERACTION_TOKEN": 32118, "[Applause]": 32107, "HYPHENATED_URL_TOKEN": 32101, "EXTRACT_SEGMENTS: ": 32110, "SHORT_HYPHENATED_TOKEN": 32104, "END_SELFPROMO_TOKEN": 32116, "NUMBER_PERCENTAGE_TOKEN": 32102, "PROFANITY_TOKEN": 32109, "[Music]": 32106, "[Laughter]": 32108, "LONG_WORD_TOKEN": 32105, "URL_TOKEN": 32100, "START_INTERACTION_TOKEN": 32117}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 307892869
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d8a0af26f7ff16bae5ef0583063c69ee47700be6a4793c994a3ef6e83d117b8
|
3 |
size 307892869
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8191c03018e0f166e6e407d71defd3cbfe5dcae86d404ff6a5903f7b765d8ac
|
3 |
size 14503
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:257198b9da6abece4de57c5b798cb130a628ec21c36fad1093e79d27e917992e
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 3.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1046,11 +1046,71 @@
|
|
1046 |
"eval_samples_per_second": 87.844,
|
1047 |
"eval_steps_per_second": 21.962,
|
1048 |
"step": 1040000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1049 |
}
|
1050 |
],
|
1051 |
"max_steps": 2791490,
|
1052 |
"num_train_epochs": 10,
|
1053 |
-
"total_flos":
|
1054 |
"trial_name": null,
|
1055 |
"trial_params": null
|
1056 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 3.9405478794478936,
|
5 |
+
"global_step": 1100000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1046 |
"eval_samples_per_second": 87.844,
|
1047 |
"eval_steps_per_second": 21.962,
|
1048 |
"step": 1040000
|
1049 |
+
},
|
1050 |
+
{
|
1051 |
+
"epoch": 3.76,
|
1052 |
+
"learning_rate": 3.119283966627142e-05,
|
1053 |
+
"loss": 0.0534,
|
1054 |
+
"step": 1050000
|
1055 |
+
},
|
1056 |
+
{
|
1057 |
+
"epoch": 3.8,
|
1058 |
+
"learning_rate": 3.1013723853569243e-05,
|
1059 |
+
"loss": 0.0552,
|
1060 |
+
"step": 1060000
|
1061 |
+
},
|
1062 |
+
{
|
1063 |
+
"epoch": 3.8,
|
1064 |
+
"eval_loss": 0.054923560470342636,
|
1065 |
+
"eval_runtime": 716.4597,
|
1066 |
+
"eval_samples_per_second": 86.583,
|
1067 |
+
"eval_steps_per_second": 21.647,
|
1068 |
+
"step": 1060000
|
1069 |
+
},
|
1070 |
+
{
|
1071 |
+
"epoch": 3.83,
|
1072 |
+
"learning_rate": 3.083460804086706e-05,
|
1073 |
+
"loss": 0.055,
|
1074 |
+
"step": 1070000
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"epoch": 3.87,
|
1078 |
+
"learning_rate": 3.065549222816489e-05,
|
1079 |
+
"loss": 0.0547,
|
1080 |
+
"step": 1080000
|
1081 |
+
},
|
1082 |
+
{
|
1083 |
+
"epoch": 3.87,
|
1084 |
+
"eval_loss": 0.05532824248075485,
|
1085 |
+
"eval_runtime": 713.1552,
|
1086 |
+
"eval_samples_per_second": 86.984,
|
1087 |
+
"eval_steps_per_second": 21.747,
|
1088 |
+
"step": 1080000
|
1089 |
+
},
|
1090 |
+
{
|
1091 |
+
"epoch": 3.9,
|
1092 |
+
"learning_rate": 3.047637641546271e-05,
|
1093 |
+
"loss": 0.0526,
|
1094 |
+
"step": 1090000
|
1095 |
+
},
|
1096 |
+
{
|
1097 |
+
"epoch": 3.94,
|
1098 |
+
"learning_rate": 3.0297260602760537e-05,
|
1099 |
+
"loss": 0.0536,
|
1100 |
+
"step": 1100000
|
1101 |
+
},
|
1102 |
+
{
|
1103 |
+
"epoch": 3.94,
|
1104 |
+
"eval_loss": 0.05594659596681595,
|
1105 |
+
"eval_runtime": 765.7749,
|
1106 |
+
"eval_samples_per_second": 81.007,
|
1107 |
+
"eval_steps_per_second": 20.253,
|
1108 |
+
"step": 1100000
|
1109 |
}
|
1110 |
],
|
1111 |
"max_steps": 2791490,
|
1112 |
"num_train_epochs": 10,
|
1113 |
+
"total_flos": 7.350678561491497e+17,
|
1114 |
"trial_name": null,
|
1115 |
"trial_params": null
|
1116 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3119
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01e951f41d1c838fb03d74c0997ade49e243ec0d07cb18ff2559ff234bae126c
|
3 |
size 3119
|