ZeroUniqueness commited on
Commit
c59800f
β€’
1 Parent(s): faadcb1

Training in progress, step 23000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d90531691a4bd22a7fc951651aaad0da90ed5d3154cec0eed731f4f8c1da5c1
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09fbcc84f52583b6b795b011e1c5f0c43a3db60e66b9f20591fcc7e4f4c5a5c
3
  size 500897101
{checkpoint-19000 β†’ checkpoint-22000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-19000 β†’ checkpoint-22000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-19000 β†’ checkpoint-22000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb683e1ef26fb6759ee6f8f26fd71fa321318d9618b1721b67182a9ba22c4bed
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d90531691a4bd22a7fc951651aaad0da90ed5d3154cec0eed731f4f8c1da5c1
3
  size 500897101
{checkpoint-19000/adapter_model β†’ checkpoint-23000}/README.md RENAMED
File without changes
{checkpoint-19000/adapter_model β†’ checkpoint-23000}/adapter_config.json RENAMED
File without changes
{checkpoint-19000/adapter_model β†’ checkpoint-23000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb683e1ef26fb6759ee6f8f26fd71fa321318d9618b1721b67182a9ba22c4bed
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09fbcc84f52583b6b795b011e1c5f0c43a3db60e66b9f20591fcc7e4f4c5a5c
3
  size 500897101
{checkpoint-19000 β†’ checkpoint-23000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9516e9c48e0b063f0894fa36644ed81a9950c2ccb238710e075ac900e1c691a
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfed6747bc76cf76b9beff591f4900ea3c29204b46ce6523ace5f8336472f0d
3
  size 1001723453
{checkpoint-19000 β†’ checkpoint-23000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe0caf0c3a09c83e2a6569bdf628a23d4ab8e1894b17df4aac29861e99504483
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2325c64a2e205df19d31c415cd90ef5d3f44b07c5241eed503aa4caf9c6bce
3
  size 14575
{checkpoint-19000 β†’ checkpoint-23000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ebd653f1bd02557ed5a069aef9f43482462db6edb223607dfd50441b1ab368a
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43a0198723866985e385225cfff961adcdcef3bedd49c15e274c7317774fe0f2
3
  size 627
{checkpoint-19000 β†’ checkpoint-23000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6378119587898254,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-19000",
4
- "epoch": 0.7084001342231834,
5
- "global_step": 19000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1298,11 +1298,283 @@
1298
  "eval_samples_per_second": 0.422,
1299
  "eval_steps_per_second": 0.422,
1300
  "step": 19000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1301
  }
1302
  ],
1303
  "max_steps": 80463,
1304
  "num_train_epochs": 3,
1305
- "total_flos": 5.326473617405952e+18,
1306
  "trial_name": null,
1307
  "trial_params": null
1308
  }
 
1
  {
2
+ "best_metric": 0.6180054545402527,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-23000",
4
+ "epoch": 0.8575370045859587,
5
+ "global_step": 23000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1298
  "eval_samples_per_second": 0.422,
1299
  "eval_steps_per_second": 0.422,
1300
  "step": 19000
1301
+ },
1302
+ {
1303
+ "epoch": 0.71,
1304
+ "learning_rate": 0.00017347885166694825,
1305
+ "loss": 0.6213,
1306
+ "step": 19100
1307
+ },
1308
+ {
1309
+ "epoch": 0.72,
1310
+ "learning_rate": 0.00017321342561755297,
1311
+ "loss": 0.6217,
1312
+ "step": 19200
1313
+ },
1314
+ {
1315
+ "epoch": 0.72,
1316
+ "learning_rate": 0.00017294688320515506,
1317
+ "loss": 0.6127,
1318
+ "step": 19300
1319
+ },
1320
+ {
1321
+ "epoch": 0.72,
1322
+ "learning_rate": 0.00017267922849401024,
1323
+ "loss": 0.6145,
1324
+ "step": 19400
1325
+ },
1326
+ {
1327
+ "epoch": 0.73,
1328
+ "learning_rate": 0.00017241046556533472,
1329
+ "loss": 0.5936,
1330
+ "step": 19500
1331
+ },
1332
+ {
1333
+ "epoch": 0.73,
1334
+ "learning_rate": 0.0001721405985172428,
1335
+ "loss": 0.6273,
1336
+ "step": 19600
1337
+ },
1338
+ {
1339
+ "epoch": 0.73,
1340
+ "learning_rate": 0.0001718696314646846,
1341
+ "loss": 0.6059,
1342
+ "step": 19700
1343
+ },
1344
+ {
1345
+ "epoch": 0.74,
1346
+ "learning_rate": 0.000171597568539383,
1347
+ "loss": 0.5934,
1348
+ "step": 19800
1349
+ },
1350
+ {
1351
+ "epoch": 0.74,
1352
+ "learning_rate": 0.000171324413889771,
1353
+ "loss": 0.6243,
1354
+ "step": 19900
1355
+ },
1356
+ {
1357
+ "epoch": 0.75,
1358
+ "learning_rate": 0.00017105017168092808,
1359
+ "loss": 0.6164,
1360
+ "step": 20000
1361
+ },
1362
+ {
1363
+ "epoch": 0.75,
1364
+ "eval_loss": 0.6324757933616638,
1365
+ "eval_runtime": 1266.6769,
1366
+ "eval_samples_per_second": 0.428,
1367
+ "eval_steps_per_second": 0.428,
1368
+ "step": 20000
1369
+ },
1370
+ {
1371
+ "epoch": 0.75,
1372
+ "learning_rate": 0.0001707748460945171,
1373
+ "loss": 0.5953,
1374
+ "step": 20100
1375
+ },
1376
+ {
1377
+ "epoch": 0.75,
1378
+ "learning_rate": 0.0001704984413287202,
1379
+ "loss": 0.6329,
1380
+ "step": 20200
1381
+ },
1382
+ {
1383
+ "epoch": 0.76,
1384
+ "learning_rate": 0.00017022096159817493,
1385
+ "loss": 0.6227,
1386
+ "step": 20300
1387
+ },
1388
+ {
1389
+ "epoch": 0.76,
1390
+ "learning_rate": 0.00016994241113391003,
1391
+ "loss": 0.6022,
1392
+ "step": 20400
1393
+ },
1394
+ {
1395
+ "epoch": 0.76,
1396
+ "learning_rate": 0.0001696627941832808,
1397
+ "loss": 0.604,
1398
+ "step": 20500
1399
+ },
1400
+ {
1401
+ "epoch": 0.77,
1402
+ "learning_rate": 0.0001693821150099044,
1403
+ "loss": 0.6101,
1404
+ "step": 20600
1405
+ },
1406
+ {
1407
+ "epoch": 0.77,
1408
+ "learning_rate": 0.00016910037789359485,
1409
+ "loss": 0.6242,
1410
+ "step": 20700
1411
+ },
1412
+ {
1413
+ "epoch": 0.78,
1414
+ "learning_rate": 0.00016881758713029776,
1415
+ "loss": 0.6096,
1416
+ "step": 20800
1417
+ },
1418
+ {
1419
+ "epoch": 0.78,
1420
+ "learning_rate": 0.0001685337470320248,
1421
+ "loss": 0.5948,
1422
+ "step": 20900
1423
+ },
1424
+ {
1425
+ "epoch": 0.78,
1426
+ "learning_rate": 0.0001682488619267879,
1427
+ "loss": 0.5911,
1428
+ "step": 21000
1429
+ },
1430
+ {
1431
+ "epoch": 0.78,
1432
+ "eval_loss": 0.6282580494880676,
1433
+ "eval_runtime": 1313.1215,
1434
+ "eval_samples_per_second": 0.413,
1435
+ "eval_steps_per_second": 0.413,
1436
+ "step": 21000
1437
+ },
1438
+ {
1439
+ "epoch": 0.79,
1440
+ "learning_rate": 0.0001679629361585335,
1441
+ "loss": 0.5716,
1442
+ "step": 21100
1443
+ },
1444
+ {
1445
+ "epoch": 0.79,
1446
+ "learning_rate": 0.00016767597408707594,
1447
+ "loss": 0.5957,
1448
+ "step": 21200
1449
+ },
1450
+ {
1451
+ "epoch": 0.79,
1452
+ "learning_rate": 0.00016738798008803128,
1453
+ "loss": 0.6308,
1454
+ "step": 21300
1455
+ },
1456
+ {
1457
+ "epoch": 0.8,
1458
+ "learning_rate": 0.00016709895855275048,
1459
+ "loss": 0.5891,
1460
+ "step": 21400
1461
+ },
1462
+ {
1463
+ "epoch": 0.8,
1464
+ "learning_rate": 0.00016680891388825243,
1465
+ "loss": 0.6104,
1466
+ "step": 21500
1467
+ },
1468
+ {
1469
+ "epoch": 0.81,
1470
+ "learning_rate": 0.00016651785051715674,
1471
+ "loss": 0.6344,
1472
+ "step": 21600
1473
+ },
1474
+ {
1475
+ "epoch": 0.81,
1476
+ "learning_rate": 0.0001662257728776163,
1477
+ "loss": 0.604,
1478
+ "step": 21700
1479
+ },
1480
+ {
1481
+ "epoch": 0.81,
1482
+ "learning_rate": 0.0001659326854232497,
1483
+ "loss": 0.6066,
1484
+ "step": 21800
1485
+ },
1486
+ {
1487
+ "epoch": 0.82,
1488
+ "learning_rate": 0.0001656385926230732,
1489
+ "loss": 0.6324,
1490
+ "step": 21900
1491
+ },
1492
+ {
1493
+ "epoch": 0.82,
1494
+ "learning_rate": 0.00016534349896143264,
1495
+ "loss": 0.5819,
1496
+ "step": 22000
1497
+ },
1498
+ {
1499
+ "epoch": 0.82,
1500
+ "eval_loss": 0.6218891143798828,
1501
+ "eval_runtime": 1296.6038,
1502
+ "eval_samples_per_second": 0.418,
1503
+ "eval_steps_per_second": 0.418,
1504
+ "step": 22000
1505
+ },
1506
+ {
1507
+ "epoch": 0.82,
1508
+ "learning_rate": 0.00016504740893793512,
1509
+ "loss": 0.6145,
1510
+ "step": 22100
1511
+ },
1512
+ {
1513
+ "epoch": 0.83,
1514
+ "learning_rate": 0.00016475032706738023,
1515
+ "loss": 0.6109,
1516
+ "step": 22200
1517
+ },
1518
+ {
1519
+ "epoch": 0.83,
1520
+ "learning_rate": 0.0001644522578796914,
1521
+ "loss": 0.608,
1522
+ "step": 22300
1523
+ },
1524
+ {
1525
+ "epoch": 0.84,
1526
+ "learning_rate": 0.0001641532059198466,
1527
+ "loss": 0.565,
1528
+ "step": 22400
1529
+ },
1530
+ {
1531
+ "epoch": 0.84,
1532
+ "learning_rate": 0.00016385317574780942,
1533
+ "loss": 0.6139,
1534
+ "step": 22500
1535
+ },
1536
+ {
1537
+ "epoch": 0.84,
1538
+ "learning_rate": 0.000163552171938459,
1539
+ "loss": 0.5888,
1540
+ "step": 22600
1541
+ },
1542
+ {
1543
+ "epoch": 0.85,
1544
+ "learning_rate": 0.00016325019908152078,
1545
+ "loss": 0.6065,
1546
+ "step": 22700
1547
+ },
1548
+ {
1549
+ "epoch": 0.85,
1550
+ "learning_rate": 0.0001629472617814962,
1551
+ "loss": 0.5959,
1552
+ "step": 22800
1553
+ },
1554
+ {
1555
+ "epoch": 0.85,
1556
+ "learning_rate": 0.00016264336465759258,
1557
+ "loss": 0.5918,
1558
+ "step": 22900
1559
+ },
1560
+ {
1561
+ "epoch": 0.86,
1562
+ "learning_rate": 0.0001623385123436528,
1563
+ "loss": 0.6083,
1564
+ "step": 23000
1565
+ },
1566
+ {
1567
+ "epoch": 0.86,
1568
+ "eval_loss": 0.6180054545402527,
1569
+ "eval_runtime": 1278.5639,
1570
+ "eval_samples_per_second": 0.424,
1571
+ "eval_steps_per_second": 0.424,
1572
+ "step": 23000
1573
  }
1574
  ],
1575
  "max_steps": 80463,
1576
  "num_train_epochs": 3,
1577
+ "total_flos": 6.446449236379976e+18,
1578
  "trial_name": null,
1579
  "trial_params": null
1580
  }
{checkpoint-19000 β†’ checkpoint-23000}/training_args.bin RENAMED
File without changes