fats-fme commited on
Commit
9a38830
1 Parent(s): 2b14f4e

Training in progress, step 187, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4c3aa1b7c7e2131be00257b3a8ad8d8a5cc00d488de310e3aa2ab739b792e83
3
  size 50503544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c109d0352421c1cb05b2ff3fc87f8bd43bf633d39b152a46ee64665256439a6d
3
  size 50503544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db6b4c4d50e731ccf97aba3d5220dc59dc330f0eba15762debc58c947b14c121
3
  size 101184122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f79a32be0dd3059fb93b2da55666cfc3e419bf5b75845923fd42b704177228fc
3
  size 101184122
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4b688646a1843cb3001738b5b25a88991e005c9c88cb1e32423e5d4a76cb0fc
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c72d6bf0f78b2357465d590b9ffc3c74333bf1690eaca071e21b5b7bee96dbb3
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90a3f1a7b3324e65a1d3becf5bb547ac5dc1f1e4bf3ec0e53f905de1e26d2dee
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d6b36d890e4feb9f504b11799c7b132518b64e8b4e20ff987bf434d0dbe85d
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9344f562fad06c4f4d31fd318ba7c558b48f7df5b7e58f8a207127dca92aacd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a5837b01af5a87d0f5eb6d6423726a5527c672ed560086af1a89b1f12d78ea5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7535070140280561,
5
  "eval_steps": 47,
6
- "global_step": 141,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1026,6 +1026,328 @@
1026
  "eval_samples_per_second": 19.75,
1027
  "eval_steps_per_second": 4.938,
1028
  "step": 141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1029
  }
1030
  ],
1031
  "logging_steps": 1,
@@ -1040,12 +1362,12 @@
1040
  "should_evaluate": false,
1041
  "should_log": false,
1042
  "should_save": true,
1043
- "should_training_stop": false
1044
  },
1045
  "attributes": {}
1046
  }
1047
  },
1048
- "total_flos": 1.1611279748549837e+17,
1049
  "train_batch_size": 2,
1050
  "trial_name": null,
1051
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9993319973279893,
5
  "eval_steps": 47,
6
+ "global_step": 187,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1026
  "eval_samples_per_second": 19.75,
1027
  "eval_steps_per_second": 4.938,
1028
  "step": 141
1029
+ },
1030
+ {
1031
+ "epoch": 0.7588510354041417,
1032
+ "grad_norm": 2.703543186187744,
1033
+ "learning_rate": 3.3741908964485414e-05,
1034
+ "loss": 1.3041,
1035
+ "step": 142
1036
+ },
1037
+ {
1038
+ "epoch": 0.7641950567802271,
1039
+ "grad_norm": 2.4582364559173584,
1040
+ "learning_rate": 3.234472017105313e-05,
1041
+ "loss": 1.5893,
1042
+ "step": 143
1043
+ },
1044
+ {
1045
+ "epoch": 0.7695390781563126,
1046
+ "grad_norm": 2.735029935836792,
1047
+ "learning_rate": 3.0971473116511394e-05,
1048
+ "loss": 1.1325,
1049
+ "step": 144
1050
+ },
1051
+ {
1052
+ "epoch": 0.7748830995323981,
1053
+ "grad_norm": 2.368544578552246,
1054
+ "learning_rate": 2.9622653763263874e-05,
1055
+ "loss": 1.3839,
1056
+ "step": 145
1057
+ },
1058
+ {
1059
+ "epoch": 0.7802271209084837,
1060
+ "grad_norm": 2.601339340209961,
1061
+ "learning_rate": 2.8298739429279707e-05,
1062
+ "loss": 2.6563,
1063
+ "step": 146
1064
+ },
1065
+ {
1066
+ "epoch": 0.7855711422845691,
1067
+ "grad_norm": 3.593987464904785,
1068
+ "learning_rate": 2.7000198619180794e-05,
1069
+ "loss": 3.1875,
1070
+ "step": 147
1071
+ },
1072
+ {
1073
+ "epoch": 0.7909151636606546,
1074
+ "grad_norm": 5.363809108734131,
1075
+ "learning_rate": 2.5727490858448288e-05,
1076
+ "loss": 3.8335,
1077
+ "step": 148
1078
+ },
1079
+ {
1080
+ "epoch": 0.7962591850367402,
1081
+ "grad_norm": 6.153328895568848,
1082
+ "learning_rate": 2.4481066530806395e-05,
1083
+ "loss": 3.3706,
1084
+ "step": 149
1085
+ },
1086
+ {
1087
+ "epoch": 0.8016032064128257,
1088
+ "grad_norm": 7.4249091148376465,
1089
+ "learning_rate": 2.3261366718841305e-05,
1090
+ "loss": 3.1387,
1091
+ "step": 150
1092
+ },
1093
+ {
1094
+ "epoch": 0.8069472277889111,
1095
+ "grad_norm": 1.7294468879699707,
1096
+ "learning_rate": 2.206882304791176e-05,
1097
+ "loss": 3.2262,
1098
+ "step": 151
1099
+ },
1100
+ {
1101
+ "epoch": 0.8122912491649966,
1102
+ "grad_norm": 2.171717643737793,
1103
+ "learning_rate": 2.0903857533405958e-05,
1104
+ "loss": 3.2502,
1105
+ "step": 152
1106
+ },
1107
+ {
1108
+ "epoch": 0.8176352705410822,
1109
+ "grad_norm": 1.9437127113342285,
1110
+ "learning_rate": 1.9766882431399646e-05,
1111
+ "loss": 3.0417,
1112
+ "step": 153
1113
+ },
1114
+ {
1115
+ "epoch": 0.8229792919171677,
1116
+ "grad_norm": 2.4127237796783447,
1117
+ "learning_rate": 1.8658300092767544e-05,
1118
+ "loss": 1.985,
1119
+ "step": 154
1120
+ },
1121
+ {
1122
+ "epoch": 0.8283233132932531,
1123
+ "grad_norm": 1.9797170162200928,
1124
+ "learning_rate": 1.7578502820800045e-05,
1125
+ "loss": 2.8997,
1126
+ "step": 155
1127
+ },
1128
+ {
1129
+ "epoch": 0.8336673346693386,
1130
+ "grad_norm": 1.9933407306671143,
1131
+ "learning_rate": 1.652787273237565e-05,
1132
+ "loss": 3.0551,
1133
+ "step": 156
1134
+ },
1135
+ {
1136
+ "epoch": 0.8390113560454242,
1137
+ "grad_norm": 2.6005380153656006,
1138
+ "learning_rate": 1.5506781622737942e-05,
1139
+ "loss": 3.1719,
1140
+ "step": 157
1141
+ },
1142
+ {
1143
+ "epoch": 0.8443553774215097,
1144
+ "grad_norm": 2.0730040073394775,
1145
+ "learning_rate": 1.4515590833925507e-05,
1146
+ "loss": 1.8285,
1147
+ "step": 158
1148
+ },
1149
+ {
1150
+ "epoch": 0.8496993987975952,
1151
+ "grad_norm": 1.7980684041976929,
1152
+ "learning_rate": 1.3554651126900564e-05,
1153
+ "loss": 2.2764,
1154
+ "step": 159
1155
+ },
1156
+ {
1157
+ "epoch": 0.8550434201736807,
1158
+ "grad_norm": 1.8802449703216553,
1159
+ "learning_rate": 1.2624302557422473e-05,
1160
+ "loss": 3.1112,
1161
+ "step": 160
1162
+ },
1163
+ {
1164
+ "epoch": 0.8603874415497662,
1165
+ "grad_norm": 2.0457687377929688,
1166
+ "learning_rate": 1.172487435570926e-05,
1167
+ "loss": 3.0587,
1168
+ "step": 161
1169
+ },
1170
+ {
1171
+ "epoch": 0.8657314629258517,
1172
+ "grad_norm": 1.9472899436950684,
1173
+ "learning_rate": 1.0856684809930151e-05,
1174
+ "loss": 2.3786,
1175
+ "step": 162
1176
+ },
1177
+ {
1178
+ "epoch": 0.8710754843019372,
1179
+ "grad_norm": 1.9495573043823242,
1180
+ "learning_rate": 1.0020041153570347e-05,
1181
+ "loss": 2.9965,
1182
+ "step": 163
1183
+ },
1184
+ {
1185
+ "epoch": 0.8764195056780227,
1186
+ "grad_norm": 2.6365323066711426,
1187
+ "learning_rate": 9.215239456707635e-06,
1188
+ "loss": 3.3418,
1189
+ "step": 164
1190
+ },
1191
+ {
1192
+ "epoch": 0.8817635270541082,
1193
+ "grad_norm": 2.1325201988220215,
1194
+ "learning_rate": 8.442564521239782e-06,
1195
+ "loss": 2.9538,
1196
+ "step": 165
1197
+ },
1198
+ {
1199
+ "epoch": 0.8871075484301937,
1200
+ "grad_norm": 2.537140369415283,
1201
+ "learning_rate": 7.70228978009907e-06,
1202
+ "loss": 1.329,
1203
+ "step": 166
1204
+ },
1205
+ {
1206
+ "epoch": 0.8924515698062793,
1207
+ "grad_norm": 2.8866047859191895,
1208
+ "learning_rate": 6.994677200490507e-06,
1209
+ "loss": 1.2163,
1210
+ "step": 167
1211
+ },
1212
+ {
1213
+ "epoch": 0.8977955911823647,
1214
+ "grad_norm": 2.537121295928955,
1215
+ "learning_rate": 6.319977191187232e-06,
1216
+ "loss": 1.4877,
1217
+ "step": 168
1218
+ },
1219
+ {
1220
+ "epoch": 0.9031396125584502,
1221
+ "grad_norm": 2.798138380050659,
1222
+ "learning_rate": 5.678428513916212e-06,
1223
+ "loss": 1.2401,
1224
+ "step": 169
1225
+ },
1226
+ {
1227
+ "epoch": 0.9084836339345357,
1228
+ "grad_norm": 1.8588306903839111,
1229
+ "learning_rate": 5.07025819886574e-06,
1230
+ "loss": 2.3853,
1231
+ "step": 170
1232
+ },
1233
+ {
1234
+ "epoch": 0.9138276553106213,
1235
+ "grad_norm": 3.1269595623016357,
1236
+ "learning_rate": 4.495681464344259e-06,
1237
+ "loss": 3.4073,
1238
+ "step": 171
1239
+ },
1240
+ {
1241
+ "epoch": 0.9191716766867067,
1242
+ "grad_norm": 3.1233201026916504,
1243
+ "learning_rate": 3.954901640619368e-06,
1244
+ "loss": 3.1646,
1245
+ "step": 172
1246
+ },
1247
+ {
1248
+ "epoch": 0.9245156980627922,
1249
+ "grad_norm": 4.395522117614746,
1250
+ "learning_rate": 3.4481100979635306e-06,
1251
+ "loss": 3.6045,
1252
+ "step": 173
1253
+ },
1254
+ {
1255
+ "epoch": 0.9298597194388778,
1256
+ "grad_norm": 5.26246452331543,
1257
+ "learning_rate": 2.9754861789324073e-06,
1258
+ "loss": 3.1748,
1259
+ "step": 174
1260
+ },
1261
+ {
1262
+ "epoch": 0.9352037408149633,
1263
+ "grad_norm": 7.157742977142334,
1264
+ "learning_rate": 2.537197134899294e-06,
1265
+ "loss": 3.3458,
1266
+ "step": 175
1267
+ },
1268
+ {
1269
+ "epoch": 0.9405477621910487,
1270
+ "grad_norm": 1.6674422025680542,
1271
+ "learning_rate": 2.1333980668685414e-06,
1272
+ "loss": 3.3945,
1273
+ "step": 176
1274
+ },
1275
+ {
1276
+ "epoch": 0.9458917835671342,
1277
+ "grad_norm": 1.6258163452148438,
1278
+ "learning_rate": 1.7642318705886286e-06,
1279
+ "loss": 2.398,
1280
+ "step": 177
1281
+ },
1282
+ {
1283
+ "epoch": 0.9512358049432198,
1284
+ "grad_norm": 1.771727442741394,
1285
+ "learning_rate": 1.4298291859845214e-06,
1286
+ "loss": 3.235,
1287
+ "step": 178
1288
+ },
1289
+ {
1290
+ "epoch": 0.9565798263193053,
1291
+ "grad_norm": 1.9831771850585938,
1292
+ "learning_rate": 1.1303083509269452e-06,
1293
+ "loss": 2.1936,
1294
+ "step": 179
1295
+ },
1296
+ {
1297
+ "epoch": 0.9619238476953907,
1298
+ "grad_norm": 2.070289373397827,
1299
+ "learning_rate": 8.657753593552143e-07,
1300
+ "loss": 2.3432,
1301
+ "step": 180
1302
+ },
1303
+ {
1304
+ "epoch": 0.9672678690714763,
1305
+ "grad_norm": 1.823572039604187,
1306
+ "learning_rate": 6.363238237683033e-07,
1307
+ "loss": 2.6982,
1308
+ "step": 181
1309
+ },
1310
+ {
1311
+ "epoch": 0.9726118904475618,
1312
+ "grad_norm": 1.9612380266189575,
1313
+ "learning_rate": 4.4203494209733576e-07,
1314
+ "loss": 2.9414,
1315
+ "step": 182
1316
+ },
1317
+ {
1318
+ "epoch": 0.9779559118236473,
1319
+ "grad_norm": 2.1800384521484375,
1320
+ "learning_rate": 2.8297746897146816e-07,
1321
+ "loss": 1.5728,
1322
+ "step": 183
1323
+ },
1324
+ {
1325
+ "epoch": 0.9832999331997327,
1326
+ "grad_norm": 2.164794921875,
1327
+ "learning_rate": 1.5920769138706438e-07,
1328
+ "loss": 1.5844,
1329
+ "step": 184
1330
+ },
1331
+ {
1332
+ "epoch": 0.9886439545758183,
1333
+ "grad_norm": 2.399670362472534,
1334
+ "learning_rate": 7.07694087889621e-08,
1335
+ "loss": 2.907,
1336
+ "step": 185
1337
+ },
1338
+ {
1339
+ "epoch": 0.9939879759519038,
1340
+ "grad_norm": 3.2542049884796143,
1341
+ "learning_rate": 1.7693917570837936e-08,
1342
+ "loss": 3.2396,
1343
+ "step": 186
1344
+ },
1345
+ {
1346
+ "epoch": 0.9993319973279893,
1347
+ "grad_norm": 6.379725933074951,
1348
+ "learning_rate": 0.0,
1349
+ "loss": 3.212,
1350
+ "step": 187
1351
  }
1352
  ],
1353
  "logging_steps": 1,
 
1362
  "should_evaluate": false,
1363
  "should_log": false,
1364
  "should_save": true,
1365
+ "should_training_stop": true
1366
  },
1367
  "attributes": {}
1368
  }
1369
  },
1370
+ "total_flos": 1.5399356829637018e+17,
1371
  "train_batch_size": 2,
1372
  "trial_name": null,
1373
  "trial_params": null