ZeroUniqueness commited on
Commit
a22bfcd
Β·
1 Parent(s): 9a55e3f

Training in progress, step 19000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71bf449473329db623c9e20b261816375a55b691f04f413c33c16578f715c541
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb683e1ef26fb6759ee6f8f26fd71fa321318d9618b1721b67182a9ba22c4bed
3
  size 500897101
{checkpoint-15000 β†’ checkpoint-18000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-15000 β†’ checkpoint-18000/adapter_model}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "v_proj",
18
- "o_proj",
19
- "gate_proj",
20
- "k_proj",
21
  "up_proj",
22
  "down_proj",
23
- "q_proj"
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
 
17
  "up_proj",
18
  "down_proj",
19
+ "q_proj",
20
+ "v_proj",
21
+ "k_proj",
22
+ "gate_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-15000 β†’ checkpoint-18000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:330f8f669df7984ed4c152d168dbe99facfaff1cf8636cd3363f622a972bfac1
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71bf449473329db623c9e20b261816375a55b691f04f413c33c16578f715c541
3
  size 500897101
{checkpoint-15000/adapter_model β†’ checkpoint-19000}/README.md RENAMED
File without changes
{checkpoint-15000/adapter_model β†’ checkpoint-19000}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "v_proj",
18
- "o_proj",
19
- "gate_proj",
20
- "k_proj",
21
  "up_proj",
22
  "down_proj",
23
- "q_proj"
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
 
17
  "up_proj",
18
  "down_proj",
19
+ "q_proj",
20
+ "v_proj",
21
+ "k_proj",
22
+ "gate_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-15000/adapter_model β†’ checkpoint-19000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:330f8f669df7984ed4c152d168dbe99facfaff1cf8636cd3363f622a972bfac1
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb683e1ef26fb6759ee6f8f26fd71fa321318d9618b1721b67182a9ba22c4bed
3
  size 500897101
{checkpoint-15000 β†’ checkpoint-19000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5099a0a1333c5a0a9fc459ec8ff82903c183ac71dd1de365c2d49ad712ab1bbc
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9516e9c48e0b063f0894fa36644ed81a9950c2ccb238710e075ac900e1c691a
3
  size 1001723453
{checkpoint-15000 β†’ checkpoint-19000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f66b2cba13baffc46c28851c1dc90993a5de44ff58a9d9551ad329c8d7120829
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe0caf0c3a09c83e2a6569bdf628a23d4ab8e1894b17df4aac29861e99504483
3
  size 14575
{checkpoint-15000 β†’ checkpoint-19000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d378580a8a0486a2e62c5917c1ceacee8a8ab8974b4ae086c3741f868751bcaf
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ebd653f1bd02557ed5a069aef9f43482462db6edb223607dfd50441b1ab368a
3
  size 627
{checkpoint-15000 β†’ checkpoint-19000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6601429581642151,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-15000",
4
- "epoch": 0.5592632638604079,
5
- "global_step": 15000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1026,11 +1026,283 @@
1026
  "eval_samples_per_second": 0.425,
1027
  "eval_steps_per_second": 0.425,
1028
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  }
1030
  ],
1031
  "max_steps": 80463,
1032
  "num_train_epochs": 3,
1033
- "total_flos": 4.203098770308219e+18,
1034
  "trial_name": null,
1035
  "trial_params": null
1036
  }
 
1
  {
2
+ "best_metric": 0.6378119587898254,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-19000",
4
+ "epoch": 0.7084001342231834,
5
+ "global_step": 19000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1026
  "eval_samples_per_second": 0.425,
1027
  "eval_steps_per_second": 0.425,
1028
  "step": 15000
1029
+ },
1030
+ {
1031
+ "epoch": 0.56,
1032
+ "learning_rate": 0.00018313597684210115,
1033
+ "loss": 0.6198,
1034
+ "step": 15100
1035
+ },
1036
+ {
1037
+ "epoch": 0.57,
1038
+ "learning_rate": 0.00018291833590461498,
1039
+ "loss": 0.6345,
1040
+ "step": 15200
1041
+ },
1042
+ {
1043
+ "epoch": 0.57,
1044
+ "learning_rate": 0.00018269943062303257,
1045
+ "loss": 0.6554,
1046
+ "step": 15300
1047
+ },
1048
+ {
1049
+ "epoch": 0.57,
1050
+ "learning_rate": 0.00018247926433523562,
1051
+ "loss": 0.6151,
1052
+ "step": 15400
1053
+ },
1054
+ {
1055
+ "epoch": 0.58,
1056
+ "learning_rate": 0.00018225784039833386,
1057
+ "loss": 0.6331,
1058
+ "step": 15500
1059
+ },
1060
+ {
1061
+ "epoch": 0.58,
1062
+ "learning_rate": 0.0001820351621886136,
1063
+ "loss": 0.6256,
1064
+ "step": 15600
1065
+ },
1066
+ {
1067
+ "epoch": 0.59,
1068
+ "learning_rate": 0.0001818112331014865,
1069
+ "loss": 0.6263,
1070
+ "step": 15700
1071
+ },
1072
+ {
1073
+ "epoch": 0.59,
1074
+ "learning_rate": 0.00018158605655143757,
1075
+ "loss": 0.6015,
1076
+ "step": 15800
1077
+ },
1078
+ {
1079
+ "epoch": 0.59,
1080
+ "learning_rate": 0.00018135963597197327,
1081
+ "loss": 0.6144,
1082
+ "step": 15900
1083
+ },
1084
+ {
1085
+ "epoch": 0.6,
1086
+ "learning_rate": 0.00018113197481556912,
1087
+ "loss": 0.613,
1088
+ "step": 16000
1089
+ },
1090
+ {
1091
+ "epoch": 0.6,
1092
+ "eval_loss": 0.6547831892967224,
1093
+ "eval_runtime": 1305.9645,
1094
+ "eval_samples_per_second": 0.415,
1095
+ "eval_steps_per_second": 0.415,
1096
+ "step": 16000
1097
+ },
1098
+ {
1099
+ "epoch": 0.6,
1100
+ "learning_rate": 0.00018090307655361701,
1101
+ "loss": 0.6354,
1102
+ "step": 16100
1103
+ },
1104
+ {
1105
+ "epoch": 0.6,
1106
+ "learning_rate": 0.00018067294467637228,
1107
+ "loss": 0.6349,
1108
+ "step": 16200
1109
+ },
1110
+ {
1111
+ "epoch": 0.61,
1112
+ "learning_rate": 0.00018044158269290054,
1113
+ "loss": 0.6127,
1114
+ "step": 16300
1115
+ },
1116
+ {
1117
+ "epoch": 0.61,
1118
+ "learning_rate": 0.00018020899413102412,
1119
+ "loss": 0.5977,
1120
+ "step": 16400
1121
+ },
1122
+ {
1123
+ "epoch": 0.62,
1124
+ "learning_rate": 0.00017997518253726834,
1125
+ "loss": 0.6213,
1126
+ "step": 16500
1127
+ },
1128
+ {
1129
+ "epoch": 0.62,
1130
+ "learning_rate": 0.00017974015147680734,
1131
+ "loss": 0.6168,
1132
+ "step": 16600
1133
+ },
1134
+ {
1135
+ "epoch": 0.62,
1136
+ "learning_rate": 0.00017950390453340978,
1137
+ "loss": 0.5978,
1138
+ "step": 16700
1139
+ },
1140
+ {
1141
+ "epoch": 0.63,
1142
+ "learning_rate": 0.0001792664453093842,
1143
+ "loss": 0.6201,
1144
+ "step": 16800
1145
+ },
1146
+ {
1147
+ "epoch": 0.63,
1148
+ "learning_rate": 0.000179027777425524,
1149
+ "loss": 0.6141,
1150
+ "step": 16900
1151
+ },
1152
+ {
1153
+ "epoch": 0.63,
1154
+ "learning_rate": 0.00017878790452105245,
1155
+ "loss": 0.6135,
1156
+ "step": 17000
1157
+ },
1158
+ {
1159
+ "epoch": 0.63,
1160
+ "eval_loss": 0.6480616927146912,
1161
+ "eval_runtime": 1347.9883,
1162
+ "eval_samples_per_second": 0.402,
1163
+ "eval_steps_per_second": 0.402,
1164
+ "step": 17000
1165
+ },
1166
+ {
1167
+ "epoch": 0.64,
1168
+ "learning_rate": 0.0001785468302535669,
1169
+ "loss": 0.6363,
1170
+ "step": 17100
1171
+ },
1172
+ {
1173
+ "epoch": 0.64,
1174
+ "learning_rate": 0.00017830455829898317,
1175
+ "loss": 0.6076,
1176
+ "step": 17200
1177
+ },
1178
+ {
1179
+ "epoch": 0.65,
1180
+ "learning_rate": 0.00017806109235147963,
1181
+ "loss": 0.609,
1182
+ "step": 17300
1183
+ },
1184
+ {
1185
+ "epoch": 0.65,
1186
+ "learning_rate": 0.00017781643612344058,
1187
+ "loss": 0.6044,
1188
+ "step": 17400
1189
+ },
1190
+ {
1191
+ "epoch": 0.65,
1192
+ "learning_rate": 0.00017757059334539994,
1193
+ "loss": 0.6262,
1194
+ "step": 17500
1195
+ },
1196
+ {
1197
+ "epoch": 0.66,
1198
+ "learning_rate": 0.00017732356776598403,
1199
+ "loss": 0.6195,
1200
+ "step": 17600
1201
+ },
1202
+ {
1203
+ "epoch": 0.66,
1204
+ "learning_rate": 0.0001770753631518548,
1205
+ "loss": 0.6328,
1206
+ "step": 17700
1207
+ },
1208
+ {
1209
+ "epoch": 0.66,
1210
+ "learning_rate": 0.000176825983287652,
1211
+ "loss": 0.6028,
1212
+ "step": 17800
1213
+ },
1214
+ {
1215
+ "epoch": 0.67,
1216
+ "learning_rate": 0.0001765754319759358,
1217
+ "loss": 0.6159,
1218
+ "step": 17900
1219
+ },
1220
+ {
1221
+ "epoch": 0.67,
1222
+ "learning_rate": 0.0001763237130371287,
1223
+ "loss": 0.6169,
1224
+ "step": 18000
1225
+ },
1226
+ {
1227
+ "epoch": 0.67,
1228
+ "eval_loss": 0.6444052457809448,
1229
+ "eval_runtime": 1304.3701,
1230
+ "eval_samples_per_second": 0.416,
1231
+ "eval_steps_per_second": 0.416,
1232
+ "step": 18000
1233
+ },
1234
+ {
1235
+ "epoch": 0.67,
1236
+ "learning_rate": 0.0001760708303094572,
1237
+ "loss": 0.6183,
1238
+ "step": 18100
1239
+ },
1240
+ {
1241
+ "epoch": 0.68,
1242
+ "learning_rate": 0.00017581678764889324,
1243
+ "loss": 0.6116,
1244
+ "step": 18200
1245
+ },
1246
+ {
1247
+ "epoch": 0.68,
1248
+ "learning_rate": 0.00017556158892909567,
1249
+ "loss": 0.6406,
1250
+ "step": 18300
1251
+ },
1252
+ {
1253
+ "epoch": 0.69,
1254
+ "learning_rate": 0.00017530523804135085,
1255
+ "loss": 0.6223,
1256
+ "step": 18400
1257
+ },
1258
+ {
1259
+ "epoch": 0.69,
1260
+ "learning_rate": 0.00017504773889451361,
1261
+ "loss": 0.628,
1262
+ "step": 18500
1263
+ },
1264
+ {
1265
+ "epoch": 0.69,
1266
+ "learning_rate": 0.00017478909541494736,
1267
+ "loss": 0.6173,
1268
+ "step": 18600
1269
+ },
1270
+ {
1271
+ "epoch": 0.7,
1272
+ "learning_rate": 0.00017452931154646444,
1273
+ "loss": 0.61,
1274
+ "step": 18700
1275
+ },
1276
+ {
1277
+ "epoch": 0.7,
1278
+ "learning_rate": 0.00017426839125026598,
1279
+ "loss": 0.5959,
1280
+ "step": 18800
1281
+ },
1282
+ {
1283
+ "epoch": 0.7,
1284
+ "learning_rate": 0.00017400633850488128,
1285
+ "loss": 0.5979,
1286
+ "step": 18900
1287
+ },
1288
+ {
1289
+ "epoch": 0.71,
1290
+ "learning_rate": 0.00017374315730610745,
1291
+ "loss": 0.6161,
1292
+ "step": 19000
1293
+ },
1294
+ {
1295
+ "epoch": 0.71,
1296
+ "eval_loss": 0.6378119587898254,
1297
+ "eval_runtime": 1283.5987,
1298
+ "eval_samples_per_second": 0.422,
1299
+ "eval_steps_per_second": 0.422,
1300
+ "step": 19000
1301
  }
1302
  ],
1303
  "max_steps": 80463,
1304
  "num_train_epochs": 3,
1305
+ "total_flos": 5.326473617405952e+18,
1306
  "trial_name": null,
1307
  "trial_params": null
1308
  }
{checkpoint-15000 β†’ checkpoint-19000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6f52f1dc0e9f02b39b53daa1c87bbc62976c717096fc5d03aab7e139a51a837
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
3
  size 4027