EColi commited on
Commit
b75fedd
1 Parent(s): efd1b11
Files changed (5) hide show
  1. README.md +68 -0
  2. all_results.json +8 -0
  3. pytorch_model.bin +1 -1
  4. train_results.json +8 -0
  5. trainer_state.json +398 -3
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - generated_from_trainer
4
+ model-index:
5
+ - name: out
6
+ results: []
7
+ ---
8
+
9
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
+ should probably proofread and complete it, then remove this comment. -->
11
+
12
+ # out
13
+
14
+ This model is a fine-tuned version of [/1TB_SSD/SB_AI/out_orig2](https://huggingface.co//1TB_SSD/SB_AI/out_orig2) on an unknown dataset.
15
+ It achieves the following results on the evaluation set:
16
+ - Loss: 0.0619
17
+
18
+ ## Model description
19
+
20
+ More information needed
21
+
22
+ ## Intended uses & limitations
23
+
24
+ More information needed
25
+
26
+ ## Training and evaluation data
27
+
28
+ More information needed
29
+
30
+ ## Training procedure
31
+
32
+ ### Training hyperparameters
33
+
34
+ The following hyperparameters were used during training:
35
+ - learning_rate: 5e-05
36
+ - train_batch_size: 1
37
+ - eval_batch_size: 1
38
+ - seed: 3784447887
39
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
+ - lr_scheduler_type: linear
41
+ - num_epochs: 1
42
+
43
+ ### Training results
44
+
45
+ | Training Loss | Epoch | Step | Validation Loss |
46
+ |:-------------:|:-----:|:-------:|:---------------:|
47
+ | 0.0867 | 0.07 | 75000 | 0.0742 |
48
+ | 0.0783 | 0.13 | 150000 | 0.0695 |
49
+ | 0.0719 | 0.2 | 225000 | 0.0732 |
50
+ | 0.0743 | 0.27 | 300000 | 0.0663 |
51
+ | 0.0659 | 0.34 | 375000 | 0.0686 |
52
+ | 0.0664 | 0.4 | 450000 | 0.0683 |
53
+ | 0.0637 | 0.47 | 525000 | 0.0680 |
54
+ | 0.0655 | 0.54 | 600000 | 0.0641 |
55
+ | 0.0676 | 0.6 | 675000 | 0.0644 |
56
+ | 0.0704 | 0.67 | 750000 | 0.0645 |
57
+ | 0.0687 | 0.74 | 825000 | 0.0610 |
58
+ | 0.059 | 0.81 | 900000 | 0.0652 |
59
+ | 0.0666 | 0.87 | 975000 | 0.0619 |
60
+ | 0.0624 | 0.94 | 1050000 | 0.0619 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - Transformers 4.15.0
66
+ - Pytorch 1.10.1+cu113
67
+ - Datasets 1.17.0
68
+ - Tokenizers 0.10.3
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.0695511792498969,
4
+ "train_runtime": 176193.0564,
5
+ "train_samples": 1116594,
6
+ "train_samples_per_second": 6.337,
7
+ "train_steps_per_second": 6.337
8
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5c1b451d6694625a36d46089d5fa956d347f8d1ddb5df73a3d4f6666292c5ab
3
  size 891703231
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fb22fb40259e3ef7d648c85bc99a714855c5d5d75c32dd548bebf38df101aea
3
  size 891703231
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.0695511792498969,
4
+ "train_runtime": 176193.0564,
5
+ "train_samples": 1116594,
6
+ "train_samples_per_second": 6.337,
7
+ "train_steps_per_second": 6.337
8
+ }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7343761474627304,
5
- "global_step": 820000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1070,11 +1070,406 @@
1070
  "learning_rate": 1.328119262686348e-05,
1071
  "loss": 0.0676,
1072
  "step": 820000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1073
  }
1074
  ],
1075
  "max_steps": 1116594,
1076
  "num_train_epochs": 1,
1077
- "total_flos": 2.4646180007983104e+17,
1078
  "trial_name": null,
1079
  "trial_params": null
1080
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 1116594,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1070
  "learning_rate": 1.328119262686348e-05,
1071
  "loss": 0.0676,
1072
  "step": 820000
1073
+ },
1074
+ {
1075
+ "epoch": 0.74,
1076
+ "learning_rate": 1.305729745995411e-05,
1077
+ "loss": 0.0687,
1078
+ "step": 825000
1079
+ },
1080
+ {
1081
+ "epoch": 0.74,
1082
+ "eval_loss": 0.061014574021101,
1083
+ "eval_runtime": 1687.4471,
1084
+ "eval_samples_per_second": 36.761,
1085
+ "eval_steps_per_second": 36.761,
1086
+ "step": 825000
1087
+ },
1088
+ {
1089
+ "epoch": 0.74,
1090
+ "learning_rate": 1.2833402293044742e-05,
1091
+ "loss": 0.0679,
1092
+ "step": 830000
1093
+ },
1094
+ {
1095
+ "epoch": 0.75,
1096
+ "learning_rate": 1.2609507126135372e-05,
1097
+ "loss": 0.0683,
1098
+ "step": 835000
1099
+ },
1100
+ {
1101
+ "epoch": 0.75,
1102
+ "learning_rate": 1.2385611959226004e-05,
1103
+ "loss": 0.0702,
1104
+ "step": 840000
1105
+ },
1106
+ {
1107
+ "epoch": 0.76,
1108
+ "learning_rate": 1.2161716792316636e-05,
1109
+ "loss": 0.0652,
1110
+ "step": 845000
1111
+ },
1112
+ {
1113
+ "epoch": 0.76,
1114
+ "learning_rate": 1.1937821625407265e-05,
1115
+ "loss": 0.0636,
1116
+ "step": 850000
1117
+ },
1118
+ {
1119
+ "epoch": 0.77,
1120
+ "learning_rate": 1.1713926458497897e-05,
1121
+ "loss": 0.0637,
1122
+ "step": 855000
1123
+ },
1124
+ {
1125
+ "epoch": 0.77,
1126
+ "learning_rate": 1.1490031291588529e-05,
1127
+ "loss": 0.0628,
1128
+ "step": 860000
1129
+ },
1130
+ {
1131
+ "epoch": 0.77,
1132
+ "learning_rate": 1.126613612467916e-05,
1133
+ "loss": 0.0701,
1134
+ "step": 865000
1135
+ },
1136
+ {
1137
+ "epoch": 0.78,
1138
+ "learning_rate": 1.104224095776979e-05,
1139
+ "loss": 0.0663,
1140
+ "step": 870000
1141
+ },
1142
+ {
1143
+ "epoch": 0.78,
1144
+ "learning_rate": 1.081834579086042e-05,
1145
+ "loss": 0.0686,
1146
+ "step": 875000
1147
+ },
1148
+ {
1149
+ "epoch": 0.79,
1150
+ "learning_rate": 1.0594450623951052e-05,
1151
+ "loss": 0.0655,
1152
+ "step": 880000
1153
+ },
1154
+ {
1155
+ "epoch": 0.79,
1156
+ "learning_rate": 1.0370555457041682e-05,
1157
+ "loss": 0.0622,
1158
+ "step": 885000
1159
+ },
1160
+ {
1161
+ "epoch": 0.8,
1162
+ "learning_rate": 1.0146660290132313e-05,
1163
+ "loss": 0.0585,
1164
+ "step": 890000
1165
+ },
1166
+ {
1167
+ "epoch": 0.8,
1168
+ "learning_rate": 9.922765123222945e-06,
1169
+ "loss": 0.0658,
1170
+ "step": 895000
1171
+ },
1172
+ {
1173
+ "epoch": 0.81,
1174
+ "learning_rate": 9.698869956313575e-06,
1175
+ "loss": 0.059,
1176
+ "step": 900000
1177
+ },
1178
+ {
1179
+ "epoch": 0.81,
1180
+ "eval_loss": 0.06522925943136215,
1181
+ "eval_runtime": 1686.0104,
1182
+ "eval_samples_per_second": 36.793,
1183
+ "eval_steps_per_second": 36.793,
1184
+ "step": 900000
1185
+ },
1186
+ {
1187
+ "epoch": 0.81,
1188
+ "learning_rate": 9.474974789404206e-06,
1189
+ "loss": 0.0624,
1190
+ "step": 905000
1191
+ },
1192
+ {
1193
+ "epoch": 0.81,
1194
+ "learning_rate": 9.251079622494838e-06,
1195
+ "loss": 0.0722,
1196
+ "step": 910000
1197
+ },
1198
+ {
1199
+ "epoch": 0.82,
1200
+ "learning_rate": 9.02718445558547e-06,
1201
+ "loss": 0.0599,
1202
+ "step": 915000
1203
+ },
1204
+ {
1205
+ "epoch": 0.82,
1206
+ "learning_rate": 8.803289288676098e-06,
1207
+ "loss": 0.0699,
1208
+ "step": 920000
1209
+ },
1210
+ {
1211
+ "epoch": 0.83,
1212
+ "learning_rate": 8.57939412176673e-06,
1213
+ "loss": 0.0606,
1214
+ "step": 925000
1215
+ },
1216
+ {
1217
+ "epoch": 0.83,
1218
+ "learning_rate": 8.355498954857361e-06,
1219
+ "loss": 0.0696,
1220
+ "step": 930000
1221
+ },
1222
+ {
1223
+ "epoch": 0.84,
1224
+ "learning_rate": 8.131603787947991e-06,
1225
+ "loss": 0.0626,
1226
+ "step": 935000
1227
+ },
1228
+ {
1229
+ "epoch": 0.84,
1230
+ "learning_rate": 7.907708621038623e-06,
1231
+ "loss": 0.0658,
1232
+ "step": 940000
1233
+ },
1234
+ {
1235
+ "epoch": 0.85,
1236
+ "learning_rate": 7.683813454129254e-06,
1237
+ "loss": 0.0641,
1238
+ "step": 945000
1239
+ },
1240
+ {
1241
+ "epoch": 0.85,
1242
+ "learning_rate": 7.459918287219884e-06,
1243
+ "loss": 0.0659,
1244
+ "step": 950000
1245
+ },
1246
+ {
1247
+ "epoch": 0.86,
1248
+ "learning_rate": 7.236023120310516e-06,
1249
+ "loss": 0.062,
1250
+ "step": 955000
1251
+ },
1252
+ {
1253
+ "epoch": 0.86,
1254
+ "learning_rate": 7.012127953401147e-06,
1255
+ "loss": 0.0658,
1256
+ "step": 960000
1257
+ },
1258
+ {
1259
+ "epoch": 0.86,
1260
+ "learning_rate": 6.788232786491779e-06,
1261
+ "loss": 0.0624,
1262
+ "step": 965000
1263
+ },
1264
+ {
1265
+ "epoch": 0.87,
1266
+ "learning_rate": 6.564337619582408e-06,
1267
+ "loss": 0.0619,
1268
+ "step": 970000
1269
+ },
1270
+ {
1271
+ "epoch": 0.87,
1272
+ "learning_rate": 6.34044245267304e-06,
1273
+ "loss": 0.0666,
1274
+ "step": 975000
1275
+ },
1276
+ {
1277
+ "epoch": 0.87,
1278
+ "eval_loss": 0.061893004924058914,
1279
+ "eval_runtime": 1682.9916,
1280
+ "eval_samples_per_second": 36.859,
1281
+ "eval_steps_per_second": 36.859,
1282
+ "step": 975000
1283
+ },
1284
+ {
1285
+ "epoch": 0.88,
1286
+ "learning_rate": 6.116547285763671e-06,
1287
+ "loss": 0.0665,
1288
+ "step": 980000
1289
+ },
1290
+ {
1291
+ "epoch": 0.88,
1292
+ "learning_rate": 5.892652118854302e-06,
1293
+ "loss": 0.0687,
1294
+ "step": 985000
1295
+ },
1296
+ {
1297
+ "epoch": 0.89,
1298
+ "learning_rate": 5.668756951944933e-06,
1299
+ "loss": 0.0656,
1300
+ "step": 990000
1301
+ },
1302
+ {
1303
+ "epoch": 0.89,
1304
+ "learning_rate": 5.444861785035564e-06,
1305
+ "loss": 0.0678,
1306
+ "step": 995000
1307
+ },
1308
+ {
1309
+ "epoch": 0.9,
1310
+ "learning_rate": 5.220966618126195e-06,
1311
+ "loss": 0.0645,
1312
+ "step": 1000000
1313
+ },
1314
+ {
1315
+ "epoch": 0.9,
1316
+ "learning_rate": 4.997071451216825e-06,
1317
+ "loss": 0.0661,
1318
+ "step": 1005000
1319
+ },
1320
+ {
1321
+ "epoch": 0.9,
1322
+ "learning_rate": 4.773176284307457e-06,
1323
+ "loss": 0.0672,
1324
+ "step": 1010000
1325
+ },
1326
+ {
1327
+ "epoch": 0.91,
1328
+ "learning_rate": 4.5492811173980875e-06,
1329
+ "loss": 0.0618,
1330
+ "step": 1015000
1331
+ },
1332
+ {
1333
+ "epoch": 0.91,
1334
+ "learning_rate": 4.325385950488719e-06,
1335
+ "loss": 0.064,
1336
+ "step": 1020000
1337
+ },
1338
+ {
1339
+ "epoch": 0.92,
1340
+ "learning_rate": 4.101490783579349e-06,
1341
+ "loss": 0.0622,
1342
+ "step": 1025000
1343
+ },
1344
+ {
1345
+ "epoch": 0.92,
1346
+ "learning_rate": 3.87759561666998e-06,
1347
+ "loss": 0.0673,
1348
+ "step": 1030000
1349
+ },
1350
+ {
1351
+ "epoch": 0.93,
1352
+ "learning_rate": 3.6537004497606115e-06,
1353
+ "loss": 0.0643,
1354
+ "step": 1035000
1355
+ },
1356
+ {
1357
+ "epoch": 0.93,
1358
+ "learning_rate": 3.4298052828512423e-06,
1359
+ "loss": 0.0688,
1360
+ "step": 1040000
1361
+ },
1362
+ {
1363
+ "epoch": 0.94,
1364
+ "learning_rate": 3.2059101159418736e-06,
1365
+ "loss": 0.0642,
1366
+ "step": 1045000
1367
+ },
1368
+ {
1369
+ "epoch": 0.94,
1370
+ "learning_rate": 2.9820149490325044e-06,
1371
+ "loss": 0.0624,
1372
+ "step": 1050000
1373
+ },
1374
+ {
1375
+ "epoch": 0.94,
1376
+ "eval_loss": 0.061922721564769745,
1377
+ "eval_runtime": 1682.5086,
1378
+ "eval_samples_per_second": 36.869,
1379
+ "eval_steps_per_second": 36.869,
1380
+ "step": 1050000
1381
+ },
1382
+ {
1383
+ "epoch": 0.94,
1384
+ "learning_rate": 2.758119782123135e-06,
1385
+ "loss": 0.0636,
1386
+ "step": 1055000
1387
+ },
1388
+ {
1389
+ "epoch": 0.95,
1390
+ "learning_rate": 2.5342246152137664e-06,
1391
+ "loss": 0.0636,
1392
+ "step": 1060000
1393
+ },
1394
+ {
1395
+ "epoch": 0.95,
1396
+ "learning_rate": 2.310329448304397e-06,
1397
+ "loss": 0.0669,
1398
+ "step": 1065000
1399
+ },
1400
+ {
1401
+ "epoch": 0.96,
1402
+ "learning_rate": 2.086434281395028e-06,
1403
+ "loss": 0.0608,
1404
+ "step": 1070000
1405
+ },
1406
+ {
1407
+ "epoch": 0.96,
1408
+ "learning_rate": 1.862539114485659e-06,
1409
+ "loss": 0.0604,
1410
+ "step": 1075000
1411
+ },
1412
+ {
1413
+ "epoch": 0.97,
1414
+ "learning_rate": 1.63864394757629e-06,
1415
+ "loss": 0.0656,
1416
+ "step": 1080000
1417
+ },
1418
+ {
1419
+ "epoch": 0.97,
1420
+ "learning_rate": 1.4147487806669212e-06,
1421
+ "loss": 0.0693,
1422
+ "step": 1085000
1423
+ },
1424
+ {
1425
+ "epoch": 0.98,
1426
+ "learning_rate": 1.190853613757552e-06,
1427
+ "loss": 0.0604,
1428
+ "step": 1090000
1429
+ },
1430
+ {
1431
+ "epoch": 0.98,
1432
+ "learning_rate": 9.66958446848183e-07,
1433
+ "loss": 0.0665,
1434
+ "step": 1095000
1435
+ },
1436
+ {
1437
+ "epoch": 0.99,
1438
+ "learning_rate": 7.43063279938814e-07,
1439
+ "loss": 0.0642,
1440
+ "step": 1100000
1441
+ },
1442
+ {
1443
+ "epoch": 0.99,
1444
+ "learning_rate": 5.191681130294449e-07,
1445
+ "loss": 0.067,
1446
+ "step": 1105000
1447
+ },
1448
+ {
1449
+ "epoch": 0.99,
1450
+ "learning_rate": 2.952729461200759e-07,
1451
+ "loss": 0.0638,
1452
+ "step": 1110000
1453
+ },
1454
+ {
1455
+ "epoch": 1.0,
1456
+ "learning_rate": 7.137777921070686e-08,
1457
+ "loss": 0.0596,
1458
+ "step": 1115000
1459
+ },
1460
+ {
1461
+ "epoch": 1.0,
1462
+ "step": 1116594,
1463
+ "total_flos": 3.3533589180916224e+17,
1464
+ "train_loss": 0.0695511792498969,
1465
+ "train_runtime": 176193.0564,
1466
+ "train_samples_per_second": 6.337,
1467
+ "train_steps_per_second": 6.337
1468
  }
1469
  ],
1470
  "max_steps": 1116594,
1471
  "num_train_epochs": 1,
1472
+ "total_flos": 3.3533589180916224e+17,
1473
  "trial_name": null,
1474
  "trial_params": null
1475
  }