ZeroUniqueness commited on
Commit
0454dd8
Β·
1 Parent(s): dab7aaa

Training in progress, step 25000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e5f593687f6b7f35b8db5be08cd3d3eabc5b69e4a5f70590ddb55cde4f73c64
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c2bc1b0742a9846ba8422b26d8c01eefc4db7cfed8f3b0f57ff4cb0ca36737
3
  size 500897101
{checkpoint-21000 β†’ checkpoint-24000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-21000 β†’ checkpoint-24000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-21000 β†’ checkpoint-24000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f715fb1f731563489cd7a96170934d1a2704e0ffdab19dbb6afc7d46ea57e62
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e5f593687f6b7f35b8db5be08cd3d3eabc5b69e4a5f70590ddb55cde4f73c64
3
  size 500897101
{checkpoint-21000/adapter_model β†’ checkpoint-25000}/README.md RENAMED
File without changes
{checkpoint-21000/adapter_model β†’ checkpoint-25000}/adapter_config.json RENAMED
File without changes
{checkpoint-21000/adapter_model β†’ checkpoint-25000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f715fb1f731563489cd7a96170934d1a2704e0ffdab19dbb6afc7d46ea57e62
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c2bc1b0742a9846ba8422b26d8c01eefc4db7cfed8f3b0f57ff4cb0ca36737
3
  size 500897101
{checkpoint-21000 β†’ checkpoint-25000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f59cb2f5454ff646db4048b5d9041957f0a425a223e652936fd2fb901f5dbb3c
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e74c891d56897c0b02740eeeace0d9ae8070e3f14d7a6244df64ddb88ba79e38
3
  size 1001723453
{checkpoint-21000 β†’ checkpoint-25000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6951e71ceade65d40e7ed9c179d7e73344510ae9dc7634046a8ee3de8abe7606
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:629e0e9594e58952b7c72ab80b4684281712821515b182bc6bccbc5f679fe7a2
3
  size 14575
{checkpoint-21000 β†’ checkpoint-25000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:699754ccbfc6f07bc2342f0db6f56a69783000f811bfcd3674285767b1132db0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88e0bf9cf69ff1eacc409b75f32fab7577ca257cb9a30972758ae80dacd7ec2e
3
  size 627
{checkpoint-21000 β†’ checkpoint-25000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6282580494880676,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-21000",
4
- "epoch": 0.782968569404571,
5
- "global_step": 21000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1434,11 +1434,283 @@
1434
  "eval_samples_per_second": 0.413,
1435
  "eval_steps_per_second": 0.413,
1436
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1437
  }
1438
  ],
1439
  "max_steps": 80463,
1440
  "num_train_epochs": 3,
1441
- "total_flos": 5.888261973449687e+18,
1442
  "trial_name": null,
1443
  "trial_params": null
1444
  }
 
1
  {
2
+ "best_metric": 0.6086174249649048,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-25000",
4
+ "epoch": 0.9321054397673465,
5
+ "global_step": 25000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1434
  "eval_samples_per_second": 0.413,
1435
  "eval_steps_per_second": 0.413,
1436
  "step": 21000
1437
+ },
1438
+ {
1439
+ "epoch": 0.79,
1440
+ "learning_rate": 0.0001679629361585335,
1441
+ "loss": 0.5716,
1442
+ "step": 21100
1443
+ },
1444
+ {
1445
+ "epoch": 0.79,
1446
+ "learning_rate": 0.00016767597408707594,
1447
+ "loss": 0.5957,
1448
+ "step": 21200
1449
+ },
1450
+ {
1451
+ "epoch": 0.79,
1452
+ "learning_rate": 0.00016738798008803128,
1453
+ "loss": 0.6308,
1454
+ "step": 21300
1455
+ },
1456
+ {
1457
+ "epoch": 0.8,
1458
+ "learning_rate": 0.00016709895855275048,
1459
+ "loss": 0.5891,
1460
+ "step": 21400
1461
+ },
1462
+ {
1463
+ "epoch": 0.8,
1464
+ "learning_rate": 0.00016680891388825243,
1465
+ "loss": 0.6104,
1466
+ "step": 21500
1467
+ },
1468
+ {
1469
+ "epoch": 0.81,
1470
+ "learning_rate": 0.00016651785051715674,
1471
+ "loss": 0.6344,
1472
+ "step": 21600
1473
+ },
1474
+ {
1475
+ "epoch": 0.81,
1476
+ "learning_rate": 0.0001662257728776163,
1477
+ "loss": 0.604,
1478
+ "step": 21700
1479
+ },
1480
+ {
1481
+ "epoch": 0.81,
1482
+ "learning_rate": 0.0001659326854232497,
1483
+ "loss": 0.6066,
1484
+ "step": 21800
1485
+ },
1486
+ {
1487
+ "epoch": 0.82,
1488
+ "learning_rate": 0.0001656385926230732,
1489
+ "loss": 0.6324,
1490
+ "step": 21900
1491
+ },
1492
+ {
1493
+ "epoch": 0.82,
1494
+ "learning_rate": 0.00016534349896143264,
1495
+ "loss": 0.5819,
1496
+ "step": 22000
1497
+ },
1498
+ {
1499
+ "epoch": 0.82,
1500
+ "eval_loss": 0.6218891143798828,
1501
+ "eval_runtime": 1296.6038,
1502
+ "eval_samples_per_second": 0.418,
1503
+ "eval_steps_per_second": 0.418,
1504
+ "step": 22000
1505
+ },
1506
+ {
1507
+ "epoch": 0.82,
1508
+ "learning_rate": 0.00016504740893793512,
1509
+ "loss": 0.6145,
1510
+ "step": 22100
1511
+ },
1512
+ {
1513
+ "epoch": 0.83,
1514
+ "learning_rate": 0.00016475032706738023,
1515
+ "loss": 0.6109,
1516
+ "step": 22200
1517
+ },
1518
+ {
1519
+ "epoch": 0.83,
1520
+ "learning_rate": 0.0001644522578796914,
1521
+ "loss": 0.608,
1522
+ "step": 22300
1523
+ },
1524
+ {
1525
+ "epoch": 0.84,
1526
+ "learning_rate": 0.0001641532059198466,
1527
+ "loss": 0.565,
1528
+ "step": 22400
1529
+ },
1530
+ {
1531
+ "epoch": 0.84,
1532
+ "learning_rate": 0.00016385317574780942,
1533
+ "loss": 0.6139,
1534
+ "step": 22500
1535
+ },
1536
+ {
1537
+ "epoch": 0.84,
1538
+ "learning_rate": 0.000163552171938459,
1539
+ "loss": 0.5888,
1540
+ "step": 22600
1541
+ },
1542
+ {
1543
+ "epoch": 0.85,
1544
+ "learning_rate": 0.00016325019908152078,
1545
+ "loss": 0.6065,
1546
+ "step": 22700
1547
+ },
1548
+ {
1549
+ "epoch": 0.85,
1550
+ "learning_rate": 0.0001629472617814962,
1551
+ "loss": 0.5959,
1552
+ "step": 22800
1553
+ },
1554
+ {
1555
+ "epoch": 0.85,
1556
+ "learning_rate": 0.00016264336465759258,
1557
+ "loss": 0.5918,
1558
+ "step": 22900
1559
+ },
1560
+ {
1561
+ "epoch": 0.86,
1562
+ "learning_rate": 0.0001623385123436528,
1563
+ "loss": 0.6083,
1564
+ "step": 23000
1565
+ },
1566
+ {
1567
+ "epoch": 0.86,
1568
+ "eval_loss": 0.6180054545402527,
1569
+ "eval_runtime": 1278.5639,
1570
+ "eval_samples_per_second": 0.424,
1571
+ "eval_steps_per_second": 0.424,
1572
+ "step": 23000
1573
+ },
1574
+ {
1575
+ "epoch": 0.86,
1576
+ "learning_rate": 0.0001620327094880844,
1577
+ "loss": 0.5795,
1578
+ "step": 23100
1579
+ },
1580
+ {
1581
+ "epoch": 0.86,
1582
+ "learning_rate": 0.00016172596075378893,
1583
+ "loss": 0.6025,
1584
+ "step": 23200
1585
+ },
1586
+ {
1587
+ "epoch": 0.87,
1588
+ "learning_rate": 0.00016141827081809075,
1589
+ "loss": 0.5669,
1590
+ "step": 23300
1591
+ },
1592
+ {
1593
+ "epoch": 0.87,
1594
+ "learning_rate": 0.00016110964437266568,
1595
+ "loss": 0.6172,
1596
+ "step": 23400
1597
+ },
1598
+ {
1599
+ "epoch": 0.88,
1600
+ "learning_rate": 0.00016080008612346955,
1601
+ "loss": 0.5899,
1602
+ "step": 23500
1603
+ },
1604
+ {
1605
+ "epoch": 0.88,
1606
+ "learning_rate": 0.00016048960079066636,
1607
+ "loss": 0.5889,
1608
+ "step": 23600
1609
+ },
1610
+ {
1611
+ "epoch": 0.88,
1612
+ "learning_rate": 0.00016017819310855632,
1613
+ "loss": 0.5893,
1614
+ "step": 23700
1615
+ },
1616
+ {
1617
+ "epoch": 0.89,
1618
+ "learning_rate": 0.00015986586782550376,
1619
+ "loss": 0.6363,
1620
+ "step": 23800
1621
+ },
1622
+ {
1623
+ "epoch": 0.89,
1624
+ "learning_rate": 0.00015955262970386458,
1625
+ "loss": 0.5876,
1626
+ "step": 23900
1627
+ },
1628
+ {
1629
+ "epoch": 0.89,
1630
+ "learning_rate": 0.00015923848351991372,
1631
+ "loss": 0.5964,
1632
+ "step": 24000
1633
+ },
1634
+ {
1635
+ "epoch": 0.89,
1636
+ "eval_loss": 0.6122664213180542,
1637
+ "eval_runtime": 1255.2341,
1638
+ "eval_samples_per_second": 0.432,
1639
+ "eval_steps_per_second": 0.432,
1640
+ "step": 24000
1641
+ },
1642
+ {
1643
+ "epoch": 0.9,
1644
+ "learning_rate": 0.00015892343406377225,
1645
+ "loss": 0.5943,
1646
+ "step": 24100
1647
+ },
1648
+ {
1649
+ "epoch": 0.9,
1650
+ "learning_rate": 0.00015860748613933455,
1651
+ "loss": 0.6008,
1652
+ "step": 24200
1653
+ },
1654
+ {
1655
+ "epoch": 0.91,
1656
+ "learning_rate": 0.00015829064456419477,
1657
+ "loss": 0.6123,
1658
+ "step": 24300
1659
+ },
1660
+ {
1661
+ "epoch": 0.91,
1662
+ "learning_rate": 0.00015797291416957355,
1663
+ "loss": 0.5819,
1664
+ "step": 24400
1665
+ },
1666
+ {
1667
+ "epoch": 0.91,
1668
+ "learning_rate": 0.00015765429980024425,
1669
+ "loss": 0.5731,
1670
+ "step": 24500
1671
+ },
1672
+ {
1673
+ "epoch": 0.92,
1674
+ "learning_rate": 0.00015733480631445926,
1675
+ "loss": 0.593,
1676
+ "step": 24600
1677
+ },
1678
+ {
1679
+ "epoch": 0.92,
1680
+ "learning_rate": 0.00015701443858387562,
1681
+ "loss": 0.5764,
1682
+ "step": 24700
1683
+ },
1684
+ {
1685
+ "epoch": 0.92,
1686
+ "learning_rate": 0.00015669320149348104,
1687
+ "loss": 0.6037,
1688
+ "step": 24800
1689
+ },
1690
+ {
1691
+ "epoch": 0.93,
1692
+ "learning_rate": 0.0001563710999415193,
1693
+ "loss": 0.5958,
1694
+ "step": 24900
1695
+ },
1696
+ {
1697
+ "epoch": 0.93,
1698
+ "learning_rate": 0.00015604813883941535,
1699
+ "loss": 0.6186,
1700
+ "step": 25000
1701
+ },
1702
+ {
1703
+ "epoch": 0.93,
1704
+ "eval_loss": 0.6086174249649048,
1705
+ "eval_runtime": 1260.3923,
1706
+ "eval_samples_per_second": 0.43,
1707
+ "eval_steps_per_second": 0.43,
1708
+ "step": 25000
1709
  }
1710
  ],
1711
  "max_steps": 80463,
1712
  "num_train_epochs": 3,
1713
+ "total_flos": 7.008735568518267e+18,
1714
  "trial_name": null,
1715
  "trial_params": null
1716
  }
{checkpoint-21000 β†’ checkpoint-25000}/training_args.bin RENAMED
File without changes