ZeroUniqueness commited on
Commit
4b1e32c
β€’
1 Parent(s): 0454dd8

Training in progress, step 26000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8c2bc1b0742a9846ba8422b26d8c01eefc4db7cfed8f3b0f57ff4cb0ca36737
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db604a690ace9df3e57323e7728a62d95ca6fe945a7a4f0390b60803c2b0d0f0
3
  size 500897101
{checkpoint-22000 β†’ checkpoint-25000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-22000 β†’ checkpoint-25000/adapter_model}/adapter_config.json RENAMED
File without changes
{checkpoint-22000 β†’ checkpoint-25000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d90531691a4bd22a7fc951651aaad0da90ed5d3154cec0eed731f4f8c1da5c1
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c2bc1b0742a9846ba8422b26d8c01eefc4db7cfed8f3b0f57ff4cb0ca36737
3
  size 500897101
{checkpoint-22000/adapter_model β†’ checkpoint-26000}/README.md RENAMED
File without changes
{checkpoint-22000/adapter_model β†’ checkpoint-26000}/adapter_config.json RENAMED
File without changes
{checkpoint-22000/adapter_model β†’ checkpoint-26000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d90531691a4bd22a7fc951651aaad0da90ed5d3154cec0eed731f4f8c1da5c1
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db604a690ace9df3e57323e7728a62d95ca6fe945a7a4f0390b60803c2b0d0f0
3
  size 500897101
{checkpoint-22000 β†’ checkpoint-26000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32d1d8e706f824a607393e4770716b98e3e14380a36b3d7dd013d1899e28d004
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27e0a8d1f02955048c81208b54b8e952c63d97fda72aa75ae96fcfc8f35af7cb
3
  size 1001723453
{checkpoint-22000 β†’ checkpoint-26000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b542bc6d6985311dc1f25608b1f96d66e1395f22a48f78d5c5c683d2d8fdde5b
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23c71a7c504ad8a21f65a57923dde0238456dda222b9af4b24d2bfc5f74e422c
3
  size 14575
{checkpoint-22000 β†’ checkpoint-26000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1276f4059dba941483dcef8d87faad5b272a9ed0c06d8c247d04b1898961557
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9f0595f0fbccf1fcdc4c120788726227aa9e6442e4233fc3c71485b74b1e4e
3
  size 627
{checkpoint-22000 β†’ checkpoint-26000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6218891143798828,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-22000",
4
- "epoch": 0.8202527869952649,
5
- "global_step": 22000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1502,11 +1502,283 @@
1502
  "eval_samples_per_second": 0.418,
1503
  "eval_steps_per_second": 0.418,
1504
  "step": 22000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1505
  }
1506
  ],
1507
  "max_steps": 80463,
1508
  "num_train_epochs": 3,
1509
- "total_flos": 6.16734700814082e+18,
1510
  "trial_name": null,
1511
  "trial_params": null
1512
  }
 
1
  {
2
+ "best_metric": 0.6026987433433533,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-26000",
4
+ "epoch": 0.9693896573580404,
5
+ "global_step": 26000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1502
  "eval_samples_per_second": 0.418,
1503
  "eval_steps_per_second": 0.418,
1504
  "step": 22000
1505
+ },
1506
+ {
1507
+ "epoch": 0.82,
1508
+ "learning_rate": 0.00016504740893793512,
1509
+ "loss": 0.6145,
1510
+ "step": 22100
1511
+ },
1512
+ {
1513
+ "epoch": 0.83,
1514
+ "learning_rate": 0.00016475032706738023,
1515
+ "loss": 0.6109,
1516
+ "step": 22200
1517
+ },
1518
+ {
1519
+ "epoch": 0.83,
1520
+ "learning_rate": 0.0001644522578796914,
1521
+ "loss": 0.608,
1522
+ "step": 22300
1523
+ },
1524
+ {
1525
+ "epoch": 0.84,
1526
+ "learning_rate": 0.0001641532059198466,
1527
+ "loss": 0.565,
1528
+ "step": 22400
1529
+ },
1530
+ {
1531
+ "epoch": 0.84,
1532
+ "learning_rate": 0.00016385317574780942,
1533
+ "loss": 0.6139,
1534
+ "step": 22500
1535
+ },
1536
+ {
1537
+ "epoch": 0.84,
1538
+ "learning_rate": 0.000163552171938459,
1539
+ "loss": 0.5888,
1540
+ "step": 22600
1541
+ },
1542
+ {
1543
+ "epoch": 0.85,
1544
+ "learning_rate": 0.00016325019908152078,
1545
+ "loss": 0.6065,
1546
+ "step": 22700
1547
+ },
1548
+ {
1549
+ "epoch": 0.85,
1550
+ "learning_rate": 0.0001629472617814962,
1551
+ "loss": 0.5959,
1552
+ "step": 22800
1553
+ },
1554
+ {
1555
+ "epoch": 0.85,
1556
+ "learning_rate": 0.00016264336465759258,
1557
+ "loss": 0.5918,
1558
+ "step": 22900
1559
+ },
1560
+ {
1561
+ "epoch": 0.86,
1562
+ "learning_rate": 0.0001623385123436528,
1563
+ "loss": 0.6083,
1564
+ "step": 23000
1565
+ },
1566
+ {
1567
+ "epoch": 0.86,
1568
+ "eval_loss": 0.6180054545402527,
1569
+ "eval_runtime": 1278.5639,
1570
+ "eval_samples_per_second": 0.424,
1571
+ "eval_steps_per_second": 0.424,
1572
+ "step": 23000
1573
+ },
1574
+ {
1575
+ "epoch": 0.86,
1576
+ "learning_rate": 0.0001620327094880844,
1577
+ "loss": 0.5795,
1578
+ "step": 23100
1579
+ },
1580
+ {
1581
+ "epoch": 0.86,
1582
+ "learning_rate": 0.00016172596075378893,
1583
+ "loss": 0.6025,
1584
+ "step": 23200
1585
+ },
1586
+ {
1587
+ "epoch": 0.87,
1588
+ "learning_rate": 0.00016141827081809075,
1589
+ "loss": 0.5669,
1590
+ "step": 23300
1591
+ },
1592
+ {
1593
+ "epoch": 0.87,
1594
+ "learning_rate": 0.00016110964437266568,
1595
+ "loss": 0.6172,
1596
+ "step": 23400
1597
+ },
1598
+ {
1599
+ "epoch": 0.88,
1600
+ "learning_rate": 0.00016080008612346955,
1601
+ "loss": 0.5899,
1602
+ "step": 23500
1603
+ },
1604
+ {
1605
+ "epoch": 0.88,
1606
+ "learning_rate": 0.00016048960079066636,
1607
+ "loss": 0.5889,
1608
+ "step": 23600
1609
+ },
1610
+ {
1611
+ "epoch": 0.88,
1612
+ "learning_rate": 0.00016017819310855632,
1613
+ "loss": 0.5893,
1614
+ "step": 23700
1615
+ },
1616
+ {
1617
+ "epoch": 0.89,
1618
+ "learning_rate": 0.00015986586782550376,
1619
+ "loss": 0.6363,
1620
+ "step": 23800
1621
+ },
1622
+ {
1623
+ "epoch": 0.89,
1624
+ "learning_rate": 0.00015955262970386458,
1625
+ "loss": 0.5876,
1626
+ "step": 23900
1627
+ },
1628
+ {
1629
+ "epoch": 0.89,
1630
+ "learning_rate": 0.00015923848351991372,
1631
+ "loss": 0.5964,
1632
+ "step": 24000
1633
+ },
1634
+ {
1635
+ "epoch": 0.89,
1636
+ "eval_loss": 0.6122664213180542,
1637
+ "eval_runtime": 1255.2341,
1638
+ "eval_samples_per_second": 0.432,
1639
+ "eval_steps_per_second": 0.432,
1640
+ "step": 24000
1641
+ },
1642
+ {
1643
+ "epoch": 0.9,
1644
+ "learning_rate": 0.00015892343406377225,
1645
+ "loss": 0.5943,
1646
+ "step": 24100
1647
+ },
1648
+ {
1649
+ "epoch": 0.9,
1650
+ "learning_rate": 0.00015860748613933455,
1651
+ "loss": 0.6008,
1652
+ "step": 24200
1653
+ },
1654
+ {
1655
+ "epoch": 0.91,
1656
+ "learning_rate": 0.00015829064456419477,
1657
+ "loss": 0.6123,
1658
+ "step": 24300
1659
+ },
1660
+ {
1661
+ "epoch": 0.91,
1662
+ "learning_rate": 0.00015797291416957355,
1663
+ "loss": 0.5819,
1664
+ "step": 24400
1665
+ },
1666
+ {
1667
+ "epoch": 0.91,
1668
+ "learning_rate": 0.00015765429980024425,
1669
+ "loss": 0.5731,
1670
+ "step": 24500
1671
+ },
1672
+ {
1673
+ "epoch": 0.92,
1674
+ "learning_rate": 0.00015733480631445926,
1675
+ "loss": 0.593,
1676
+ "step": 24600
1677
+ },
1678
+ {
1679
+ "epoch": 0.92,
1680
+ "learning_rate": 0.00015701443858387562,
1681
+ "loss": 0.5764,
1682
+ "step": 24700
1683
+ },
1684
+ {
1685
+ "epoch": 0.92,
1686
+ "learning_rate": 0.00015669320149348104,
1687
+ "loss": 0.6037,
1688
+ "step": 24800
1689
+ },
1690
+ {
1691
+ "epoch": 0.93,
1692
+ "learning_rate": 0.0001563710999415193,
1693
+ "loss": 0.5958,
1694
+ "step": 24900
1695
+ },
1696
+ {
1697
+ "epoch": 0.93,
1698
+ "learning_rate": 0.00015604813883941535,
1699
+ "loss": 0.6186,
1700
+ "step": 25000
1701
+ },
1702
+ {
1703
+ "epoch": 0.93,
1704
+ "eval_loss": 0.6086174249649048,
1705
+ "eval_runtime": 1260.3923,
1706
+ "eval_samples_per_second": 0.43,
1707
+ "eval_steps_per_second": 0.43,
1708
+ "step": 25000
1709
+ },
1710
+ {
1711
+ "epoch": 0.94,
1712
+ "learning_rate": 0.00015572432311170096,
1713
+ "loss": 0.597,
1714
+ "step": 25100
1715
+ },
1716
+ {
1717
+ "epoch": 0.94,
1718
+ "learning_rate": 0.00015539965769593894,
1719
+ "loss": 0.5657,
1720
+ "step": 25200
1721
+ },
1722
+ {
1723
+ "epoch": 0.94,
1724
+ "learning_rate": 0.0001550741475426484,
1725
+ "loss": 0.6081,
1726
+ "step": 25300
1727
+ },
1728
+ {
1729
+ "epoch": 0.95,
1730
+ "learning_rate": 0.00015474779761522894,
1731
+ "loss": 0.5957,
1732
+ "step": 25400
1733
+ },
1734
+ {
1735
+ "epoch": 0.95,
1736
+ "learning_rate": 0.00015442061288988525,
1737
+ "loss": 0.6032,
1738
+ "step": 25500
1739
+ },
1740
+ {
1741
+ "epoch": 0.95,
1742
+ "learning_rate": 0.00015409259835555089,
1743
+ "loss": 0.5662,
1744
+ "step": 25600
1745
+ },
1746
+ {
1747
+ "epoch": 0.96,
1748
+ "learning_rate": 0.00015376375901381256,
1749
+ "loss": 0.5607,
1750
+ "step": 25700
1751
+ },
1752
+ {
1753
+ "epoch": 0.96,
1754
+ "learning_rate": 0.00015343409987883354,
1755
+ "loss": 0.5727,
1756
+ "step": 25800
1757
+ },
1758
+ {
1759
+ "epoch": 0.97,
1760
+ "learning_rate": 0.00015310362597727747,
1761
+ "loss": 0.5762,
1762
+ "step": 25900
1763
+ },
1764
+ {
1765
+ "epoch": 0.97,
1766
+ "learning_rate": 0.00015277234234823154,
1767
+ "loss": 0.5841,
1768
+ "step": 26000
1769
+ },
1770
+ {
1771
+ "epoch": 0.97,
1772
+ "eval_loss": 0.6026987433433533,
1773
+ "eval_runtime": 1292.1515,
1774
+ "eval_samples_per_second": 0.419,
1775
+ "eval_steps_per_second": 0.419,
1776
+ "step": 26000
1777
  }
1778
  ],
1779
  "max_steps": 80463,
1780
  "num_train_epochs": 3,
1781
+ "total_flos": 7.289703933515858e+18,
1782
  "trial_name": null,
1783
  "trial_params": null
1784
  }
{checkpoint-22000 β†’ checkpoint-26000}/training_args.bin RENAMED
File without changes