ZeroUniqueness commited on
Commit
faadcb1
β€’
1 Parent(s): 2c0a422

Training in progress, step 22000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f715fb1f731563489cd7a96170934d1a2704e0ffdab19dbb6afc7d46ea57e62
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d90531691a4bd22a7fc951651aaad0da90ed5d3154cec0eed731f4f8c1da5c1
3
  size 500897101
{checkpoint-18000 β†’ checkpoint-21000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-18000 β†’ checkpoint-21000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-18000 β†’ checkpoint-21000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71bf449473329db623c9e20b261816375a55b691f04f413c33c16578f715c541
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f715fb1f731563489cd7a96170934d1a2704e0ffdab19dbb6afc7d46ea57e62
3
  size 500897101
{checkpoint-18000/adapter_model β†’ checkpoint-22000}/README.md RENAMED
File without changes
{checkpoint-18000/adapter_model β†’ checkpoint-22000}/adapter_config.json RENAMED
File without changes
{checkpoint-18000/adapter_model β†’ checkpoint-22000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71bf449473329db623c9e20b261816375a55b691f04f413c33c16578f715c541
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d90531691a4bd22a7fc951651aaad0da90ed5d3154cec0eed731f4f8c1da5c1
3
  size 500897101
{checkpoint-18000 β†’ checkpoint-22000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26df0d8c29c8387b0078001ba77499ae2c7e3c93bf63ba77b6e9f6e5d426a377
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d1d8e706f824a607393e4770716b98e3e14380a36b3d7dd013d1899e28d004
3
  size 1001723453
{checkpoint-18000 β†’ checkpoint-22000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d53dc571c7357590eb279d921d2c025244bb82d035e40e44f08a12815eec53c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b542bc6d6985311dc1f25608b1f96d66e1395f22a48f78d5c5c683d2d8fdde5b
3
  size 14575
{checkpoint-18000 β†’ checkpoint-22000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b78dacfd488ca08dbc1fe1369dc6869b6315793fbe6735fa309357590127c966
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1276f4059dba941483dcef8d87faad5b272a9ed0c06d8c247d04b1898961557
3
  size 627
{checkpoint-18000 β†’ checkpoint-22000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6444052457809448,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-18000",
4
- "epoch": 0.6711159166324895,
5
- "global_step": 18000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1230,11 +1230,283 @@
1230
  "eval_samples_per_second": 0.416,
1231
  "eval_steps_per_second": 0.416,
1232
  "step": 18000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
  }
1234
  ],
1235
  "max_steps": 80463,
1236
  "num_train_epochs": 3,
1237
- "total_flos": 5.045441572600873e+18,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
 
1
  {
2
+ "best_metric": 0.6218891143798828,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-22000",
4
+ "epoch": 0.8202527869952649,
5
+ "global_step": 22000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1230
  "eval_samples_per_second": 0.416,
1231
  "eval_steps_per_second": 0.416,
1232
  "step": 18000
1233
+ },
1234
+ {
1235
+ "epoch": 0.67,
1236
+ "learning_rate": 0.0001760708303094572,
1237
+ "loss": 0.6183,
1238
+ "step": 18100
1239
+ },
1240
+ {
1241
+ "epoch": 0.68,
1242
+ "learning_rate": 0.00017581678764889324,
1243
+ "loss": 0.6116,
1244
+ "step": 18200
1245
+ },
1246
+ {
1247
+ "epoch": 0.68,
1248
+ "learning_rate": 0.00017556158892909567,
1249
+ "loss": 0.6406,
1250
+ "step": 18300
1251
+ },
1252
+ {
1253
+ "epoch": 0.69,
1254
+ "learning_rate": 0.00017530523804135085,
1255
+ "loss": 0.6223,
1256
+ "step": 18400
1257
+ },
1258
+ {
1259
+ "epoch": 0.69,
1260
+ "learning_rate": 0.00017504773889451361,
1261
+ "loss": 0.628,
1262
+ "step": 18500
1263
+ },
1264
+ {
1265
+ "epoch": 0.69,
1266
+ "learning_rate": 0.00017478909541494736,
1267
+ "loss": 0.6173,
1268
+ "step": 18600
1269
+ },
1270
+ {
1271
+ "epoch": 0.7,
1272
+ "learning_rate": 0.00017452931154646444,
1273
+ "loss": 0.61,
1274
+ "step": 18700
1275
+ },
1276
+ {
1277
+ "epoch": 0.7,
1278
+ "learning_rate": 0.00017426839125026598,
1279
+ "loss": 0.5959,
1280
+ "step": 18800
1281
+ },
1282
+ {
1283
+ "epoch": 0.7,
1284
+ "learning_rate": 0.00017400633850488128,
1285
+ "loss": 0.5979,
1286
+ "step": 18900
1287
+ },
1288
+ {
1289
+ "epoch": 0.71,
1290
+ "learning_rate": 0.00017374315730610745,
1291
+ "loss": 0.6161,
1292
+ "step": 19000
1293
+ },
1294
+ {
1295
+ "epoch": 0.71,
1296
+ "eval_loss": 0.6378119587898254,
1297
+ "eval_runtime": 1283.5987,
1298
+ "eval_samples_per_second": 0.422,
1299
+ "eval_steps_per_second": 0.422,
1300
+ "step": 19000
1301
+ },
1302
+ {
1303
+ "epoch": 0.71,
1304
+ "learning_rate": 0.00017347885166694825,
1305
+ "loss": 0.6213,
1306
+ "step": 19100
1307
+ },
1308
+ {
1309
+ "epoch": 0.72,
1310
+ "learning_rate": 0.00017321342561755297,
1311
+ "loss": 0.6217,
1312
+ "step": 19200
1313
+ },
1314
+ {
1315
+ "epoch": 0.72,
1316
+ "learning_rate": 0.00017294688320515506,
1317
+ "loss": 0.6127,
1318
+ "step": 19300
1319
+ },
1320
+ {
1321
+ "epoch": 0.72,
1322
+ "learning_rate": 0.00017267922849401024,
1323
+ "loss": 0.6145,
1324
+ "step": 19400
1325
+ },
1326
+ {
1327
+ "epoch": 0.73,
1328
+ "learning_rate": 0.00017241046556533472,
1329
+ "loss": 0.5936,
1330
+ "step": 19500
1331
+ },
1332
+ {
1333
+ "epoch": 0.73,
1334
+ "learning_rate": 0.0001721405985172428,
1335
+ "loss": 0.6273,
1336
+ "step": 19600
1337
+ },
1338
+ {
1339
+ "epoch": 0.73,
1340
+ "learning_rate": 0.0001718696314646846,
1341
+ "loss": 0.6059,
1342
+ "step": 19700
1343
+ },
1344
+ {
1345
+ "epoch": 0.74,
1346
+ "learning_rate": 0.000171597568539383,
1347
+ "loss": 0.5934,
1348
+ "step": 19800
1349
+ },
1350
+ {
1351
+ "epoch": 0.74,
1352
+ "learning_rate": 0.000171324413889771,
1353
+ "loss": 0.6243,
1354
+ "step": 19900
1355
+ },
1356
+ {
1357
+ "epoch": 0.75,
1358
+ "learning_rate": 0.00017105017168092808,
1359
+ "loss": 0.6164,
1360
+ "step": 20000
1361
+ },
1362
+ {
1363
+ "epoch": 0.75,
1364
+ "eval_loss": 0.6324757933616638,
1365
+ "eval_runtime": 1266.6769,
1366
+ "eval_samples_per_second": 0.428,
1367
+ "eval_steps_per_second": 0.428,
1368
+ "step": 20000
1369
+ },
1370
+ {
1371
+ "epoch": 0.75,
1372
+ "learning_rate": 0.0001707748460945171,
1373
+ "loss": 0.5953,
1374
+ "step": 20100
1375
+ },
1376
+ {
1377
+ "epoch": 0.75,
1378
+ "learning_rate": 0.0001704984413287202,
1379
+ "loss": 0.6329,
1380
+ "step": 20200
1381
+ },
1382
+ {
1383
+ "epoch": 0.76,
1384
+ "learning_rate": 0.00017022096159817493,
1385
+ "loss": 0.6227,
1386
+ "step": 20300
1387
+ },
1388
+ {
1389
+ "epoch": 0.76,
1390
+ "learning_rate": 0.00016994241113391003,
1391
+ "loss": 0.6022,
1392
+ "step": 20400
1393
+ },
1394
+ {
1395
+ "epoch": 0.76,
1396
+ "learning_rate": 0.0001696627941832808,
1397
+ "loss": 0.604,
1398
+ "step": 20500
1399
+ },
1400
+ {
1401
+ "epoch": 0.77,
1402
+ "learning_rate": 0.0001693821150099044,
1403
+ "loss": 0.6101,
1404
+ "step": 20600
1405
+ },
1406
+ {
1407
+ "epoch": 0.77,
1408
+ "learning_rate": 0.00016910037789359485,
1409
+ "loss": 0.6242,
1410
+ "step": 20700
1411
+ },
1412
+ {
1413
+ "epoch": 0.78,
1414
+ "learning_rate": 0.00016881758713029776,
1415
+ "loss": 0.6096,
1416
+ "step": 20800
1417
+ },
1418
+ {
1419
+ "epoch": 0.78,
1420
+ "learning_rate": 0.0001685337470320248,
1421
+ "loss": 0.5948,
1422
+ "step": 20900
1423
+ },
1424
+ {
1425
+ "epoch": 0.78,
1426
+ "learning_rate": 0.0001682488619267879,
1427
+ "loss": 0.5911,
1428
+ "step": 21000
1429
+ },
1430
+ {
1431
+ "epoch": 0.78,
1432
+ "eval_loss": 0.6282580494880676,
1433
+ "eval_runtime": 1313.1215,
1434
+ "eval_samples_per_second": 0.413,
1435
+ "eval_steps_per_second": 0.413,
1436
+ "step": 21000
1437
+ },
1438
+ {
1439
+ "epoch": 0.79,
1440
+ "learning_rate": 0.0001679629361585335,
1441
+ "loss": 0.5716,
1442
+ "step": 21100
1443
+ },
1444
+ {
1445
+ "epoch": 0.79,
1446
+ "learning_rate": 0.00016767597408707594,
1447
+ "loss": 0.5957,
1448
+ "step": 21200
1449
+ },
1450
+ {
1451
+ "epoch": 0.79,
1452
+ "learning_rate": 0.00016738798008803128,
1453
+ "loss": 0.6308,
1454
+ "step": 21300
1455
+ },
1456
+ {
1457
+ "epoch": 0.8,
1458
+ "learning_rate": 0.00016709895855275048,
1459
+ "loss": 0.5891,
1460
+ "step": 21400
1461
+ },
1462
+ {
1463
+ "epoch": 0.8,
1464
+ "learning_rate": 0.00016680891388825243,
1465
+ "loss": 0.6104,
1466
+ "step": 21500
1467
+ },
1468
+ {
1469
+ "epoch": 0.81,
1470
+ "learning_rate": 0.00016651785051715674,
1471
+ "loss": 0.6344,
1472
+ "step": 21600
1473
+ },
1474
+ {
1475
+ "epoch": 0.81,
1476
+ "learning_rate": 0.0001662257728776163,
1477
+ "loss": 0.604,
1478
+ "step": 21700
1479
+ },
1480
+ {
1481
+ "epoch": 0.81,
1482
+ "learning_rate": 0.0001659326854232497,
1483
+ "loss": 0.6066,
1484
+ "step": 21800
1485
+ },
1486
+ {
1487
+ "epoch": 0.82,
1488
+ "learning_rate": 0.0001656385926230732,
1489
+ "loss": 0.6324,
1490
+ "step": 21900
1491
+ },
1492
+ {
1493
+ "epoch": 0.82,
1494
+ "learning_rate": 0.00016534349896143264,
1495
+ "loss": 0.5819,
1496
+ "step": 22000
1497
+ },
1498
+ {
1499
+ "epoch": 0.82,
1500
+ "eval_loss": 0.6218891143798828,
1501
+ "eval_runtime": 1296.6038,
1502
+ "eval_samples_per_second": 0.418,
1503
+ "eval_steps_per_second": 0.418,
1504
+ "step": 22000
1505
  }
1506
  ],
1507
  "max_steps": 80463,
1508
  "num_train_epochs": 3,
1509
+ "total_flos": 6.16734700814082e+18,
1510
  "trial_name": null,
1511
  "trial_params": null
1512
  }
{checkpoint-18000 β†’ checkpoint-22000}/training_args.bin RENAMED
File without changes