ZeroUniqueness
commited on
Commit
Β·
8d2b789
1
Parent(s):
299ea3d
Training in progress, step 4900
Browse files- adapter_model.bin +1 -1
- {checkpoint-4500 β checkpoint-4800/adapter_model}/README.md +0 -0
- {checkpoint-4500 β checkpoint-4800/adapter_model}/adapter_config.json +4 -4
- {checkpoint-4500 β checkpoint-4800/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-4500/adapter_model β checkpoint-4900}/README.md +0 -0
- {checkpoint-4500/adapter_model β checkpoint-4900}/adapter_config.json +4 -4
- {checkpoint-4500/adapter_model β checkpoint-4900}/adapter_model.bin +1 -1
- {checkpoint-4500 β checkpoint-4900}/optimizer.pt +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_0.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_1.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_10.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_11.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_12.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_13.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_2.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_3.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_4.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_5.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_6.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_7.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_8.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/rng_state_9.pth +1 -1
- {checkpoint-4500 β checkpoint-4900}/scheduler.pt +1 -1
- {checkpoint-4500 β checkpoint-4900}/trainer_state.json +99 -3
- {checkpoint-4500 β checkpoint-4900}/training_args.bin +1 -1
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a3bb33a0c4878b032aec2026e3879865fa62989d9939826a716eb3580fa34cd
|
3 |
size 500897101
|
{checkpoint-4500 β checkpoint-4800/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-4500 β checkpoint-4800/adapter_model}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"gate_proj",
|
18 |
"v_proj",
|
19 |
-
"k_proj",
|
20 |
"q_proj",
|
21 |
-
"up_proj"
|
22 |
-
"o_proj",
|
23 |
-
"down_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"k_proj",
|
18 |
+
"o_proj",
|
19 |
+
"down_proj",
|
20 |
"gate_proj",
|
21 |
"v_proj",
|
|
|
22 |
"q_proj",
|
23 |
+
"up_proj"
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-4500 β checkpoint-4800/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff09dc21b35d5903035d753d146589f20bd9535c0d4b5f1426eb089c6ad99842
|
3 |
size 500897101
|
{checkpoint-4500/adapter_model β checkpoint-4900}/README.md
RENAMED
File without changes
|
{checkpoint-4500/adapter_model β checkpoint-4900}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"gate_proj",
|
18 |
"v_proj",
|
19 |
-
"k_proj",
|
20 |
"q_proj",
|
21 |
-
"up_proj"
|
22 |
-
"o_proj",
|
23 |
-
"down_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
+
"k_proj",
|
18 |
+
"o_proj",
|
19 |
+
"down_proj",
|
20 |
"gate_proj",
|
21 |
"v_proj",
|
|
|
22 |
"q_proj",
|
23 |
+
"up_proj"
|
|
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-4500/adapter_model β checkpoint-4900}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a3bb33a0c4878b032aec2026e3879865fa62989d9939826a716eb3580fa34cd
|
3 |
size 500897101
|
{checkpoint-4500 β checkpoint-4900}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89baee7be101e0977ca1b6a39db7917193704b00b2bc0dc4bbf1b2e60e7e6a22
|
3 |
size 1001752701
|
{checkpoint-4500 β checkpoint-4900}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d2fb5eb561d4b92c89062c2939861ac27d199067133d2975e1e2aecd6254425
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:845f7f7308d4eb8331386ad93aebcacf5679b5a7f2b26d79e0e881c33768d801
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac19165b3d0d8492404b2597a692a25174dc25bbebee0818cc70fb91e7fa66a9
|
3 |
size 27789
|
{checkpoint-4500 β checkpoint-4900}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ba175f65490c813c43be1b8e90c05273b365a5fc35034a39d3e40171b6649d8
|
3 |
size 27789
|
{checkpoint-4500 β checkpoint-4900}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff1e6d6959816301feff7a9806d0ca028fe2a211e238b9dca8fd390289758baa
|
3 |
size 27789
|
{checkpoint-4500 β checkpoint-4900}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1211e56e1d3e784cd9f700dcd2e4b8d94627474333f9ea95573cbff916f6590
|
3 |
size 27789
|
{checkpoint-4500 β checkpoint-4900}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a4a16265fbd33a987004f285a9ac8e9d11fa74e2f1e1ebfae43b07d355feb1a
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25be8bdc125a49f8faa5491285fe8c51cbd061cd60a12c31e1d294969eaeed1b
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8edc43261484ceb88153a0cf622a2f1d79ae2bd2a22d2cfc258fbc4d67c9fcf
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f3c01adf24a8fb5ced49e768a00799034bf23fdbe170faf373d7bffc71cf330e
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1431fdce90c9dffa41af7e45225d7e937f8a892f36234dd2c9c0c5ea825d344b
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:698169f9adbf09c9e392876211e57228599d852964332781ccccb91b02c99668
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e12b62affc946cb152ae7675f1074ed4a8c28a930020230d21a02278a8cfc66
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d0857929e84e3ee4f8ead280fd3f99826ceb6e6e749cce250845a12d23d0cc7
|
3 |
size 27772
|
{checkpoint-4500 β checkpoint-4900}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fce2b4a13969f53fdf68394680e1b829aa39975e2753c73d5d093518e2315b1c
|
3 |
size 627
|
{checkpoint-4500 β checkpoint-4900}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1094,11 +1094,107 @@
|
|
1094 |
"learning_rate": 7.480272555710227e-05,
|
1095 |
"loss": 0.8006,
|
1096 |
"step": 4500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1097 |
}
|
1098 |
],
|
1099 |
"max_steps": 7737,
|
1100 |
"num_train_epochs": 3,
|
1101 |
-
"total_flos":
|
1102 |
"trial_name": null,
|
1103 |
"trial_params": null
|
1104 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.8999612252811167,
|
5 |
+
"global_step": 4900,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1094 |
"learning_rate": 7.480272555710227e-05,
|
1095 |
"loss": 0.8006,
|
1096 |
"step": 4500
|
1097 |
+
},
|
1098 |
+
{
|
1099 |
+
"epoch": 1.75,
|
1100 |
+
"learning_rate": 7.382040650400185e-05,
|
1101 |
+
"loss": 0.7937,
|
1102 |
+
"step": 4525
|
1103 |
+
},
|
1104 |
+
{
|
1105 |
+
"epoch": 1.76,
|
1106 |
+
"learning_rate": 7.28407921384267e-05,
|
1107 |
+
"loss": 0.794,
|
1108 |
+
"step": 4550
|
1109 |
+
},
|
1110 |
+
{
|
1111 |
+
"epoch": 1.77,
|
1112 |
+
"learning_rate": 7.186398366709545e-05,
|
1113 |
+
"loss": 0.7931,
|
1114 |
+
"step": 4575
|
1115 |
+
},
|
1116 |
+
{
|
1117 |
+
"epoch": 1.78,
|
1118 |
+
"learning_rate": 7.089008200684197e-05,
|
1119 |
+
"loss": 0.7982,
|
1120 |
+
"step": 4600
|
1121 |
+
},
|
1122 |
+
{
|
1123 |
+
"epoch": 1.79,
|
1124 |
+
"learning_rate": 6.991918777418928e-05,
|
1125 |
+
"loss": 0.7916,
|
1126 |
+
"step": 4625
|
1127 |
+
},
|
1128 |
+
{
|
1129 |
+
"epoch": 1.8,
|
1130 |
+
"learning_rate": 6.895140127495455e-05,
|
1131 |
+
"loss": 0.7919,
|
1132 |
+
"step": 4650
|
1133 |
+
},
|
1134 |
+
{
|
1135 |
+
"epoch": 1.81,
|
1136 |
+
"learning_rate": 6.798682249388631e-05,
|
1137 |
+
"loss": 0.7863,
|
1138 |
+
"step": 4675
|
1139 |
+
},
|
1140 |
+
{
|
1141 |
+
"epoch": 1.82,
|
1142 |
+
"learning_rate": 6.702555108433461e-05,
|
1143 |
+
"loss": 0.789,
|
1144 |
+
"step": 4700
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"epoch": 1.83,
|
1148 |
+
"learning_rate": 6.606768635795574e-05,
|
1149 |
+
"loss": 0.7902,
|
1150 |
+
"step": 4725
|
1151 |
+
},
|
1152 |
+
{
|
1153 |
+
"epoch": 1.84,
|
1154 |
+
"learning_rate": 6.511332727445191e-05,
|
1155 |
+
"loss": 0.7924,
|
1156 |
+
"step": 4750
|
1157 |
+
},
|
1158 |
+
{
|
1159 |
+
"epoch": 1.85,
|
1160 |
+
"learning_rate": 6.416257243134747e-05,
|
1161 |
+
"loss": 0.7957,
|
1162 |
+
"step": 4775
|
1163 |
+
},
|
1164 |
+
{
|
1165 |
+
"epoch": 1.86,
|
1166 |
+
"learning_rate": 6.321552005380256e-05,
|
1167 |
+
"loss": 0.7916,
|
1168 |
+
"step": 4800
|
1169 |
+
},
|
1170 |
+
{
|
1171 |
+
"epoch": 1.87,
|
1172 |
+
"learning_rate": 6.22722679844652e-05,
|
1173 |
+
"loss": 0.7867,
|
1174 |
+
"step": 4825
|
1175 |
+
},
|
1176 |
+
{
|
1177 |
+
"epoch": 1.88,
|
1178 |
+
"learning_rate": 6.133291367336284e-05,
|
1179 |
+
"loss": 0.7944,
|
1180 |
+
"step": 4850
|
1181 |
+
},
|
1182 |
+
{
|
1183 |
+
"epoch": 1.89,
|
1184 |
+
"learning_rate": 6.039755416783457e-05,
|
1185 |
+
"loss": 0.7982,
|
1186 |
+
"step": 4875
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 1.9,
|
1190 |
+
"learning_rate": 5.946628610250484e-05,
|
1191 |
+
"loss": 0.7918,
|
1192 |
+
"step": 4900
|
1193 |
}
|
1194 |
],
|
1195 |
"max_steps": 7737,
|
1196 |
"num_train_epochs": 3,
|
1197 |
+
"total_flos": 2.1101658419601867e+19,
|
1198 |
"trial_name": null,
|
1199 |
"trial_params": null
|
1200 |
}
|
{checkpoint-4500 β checkpoint-4900}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:354c4769acaf21317d8ab58ba2dd133763f3bee1aec7488fb334e45f3e80ebc3
|
3 |
size 4027
|