cwaud commited on
Commit
c1a592d
1 Parent(s): e8e8d46

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daa6a5dd07b72ef9430ace44e750fcdb4b7759bb0894a8b53a989328b0f60392
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba114e5f887ed6bee28791b1206325155477c2e5560fb5a0fa2445b9a14ab670
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee8d3047990aa86dc0a36c1ef1cd2bb44cd433c10bb8c7e8d5f0f851a8fd47e6
3
  size 295765866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a6eae1f8a74cb4e2ecfce996fe78d7c29b0d277b81ad12a68deb43452a9e39
3
  size 295765866
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e26f5521fc1e6154bc9e3b1f3aa7674a376b8de111db48b8d3988bf77187f582
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da830074bb329feb347ddaab079f26d40e80efce113e07a9d3f4f756cfd5bbb
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd15b1b91b4e9d24520dd50941a3a4e7796b5305fbac3c66fe134a0d0a7ca684
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efeca56ff129fd1938154764e5eedeecbd5777ae5c87bc82a516810d01093718
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8ec05176df7a0c2aa9818822dcf5a3c91459baab3140e17c9bffba86c07f82f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c859c36874bfac59ee5311bb46d04057167ce59e38d261dd16a448e0837fdf72
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f70ac3e835d56a709928da1914610ebaf09c78697ec6a3898b27108602ce80de
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca7fcff078dbdb0c1c6efcaf72bc4a9140c10cda16434ba2611620408aff6078
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78f280e0e658edbdc4fcfbaf05da6eb84d8d86c74ef9e6edc7763096efc3a439
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e6892bef9374493ad62f6ec1fe71e66e2ecb6a2a8c48a6591b2eb5cb1debcb4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1887668371200562,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 1.7218186709712133,
5
  "eval_steps": 25,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1479,6 +1479,372 @@
1479
  "eval_samples_per_second": 39.253,
1480
  "eval_steps_per_second": 10.206,
1481
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1482
  }
1483
  ],
1484
  "logging_steps": 1,
@@ -1493,7 +1859,7 @@
1493
  "early_stopping_threshold": 0.0
1494
  },
1495
  "attributes": {
1496
- "early_stopping_patience_counter": 0
1497
  }
1498
  },
1499
  "TrainerControl": {
@@ -1502,12 +1868,12 @@
1502
  "should_evaluate": false,
1503
  "should_log": false,
1504
  "should_save": true,
1505
- "should_training_stop": false
1506
  },
1507
  "attributes": {}
1508
  }
1509
  },
1510
- "total_flos": 4.248409672001782e+17,
1511
  "train_batch_size": 1,
1512
  "trial_name": null,
1513
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1887668371200562,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 2.1522733387140165,
5
  "eval_steps": 25,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1479
  "eval_samples_per_second": 39.253,
1480
  "eval_steps_per_second": 10.206,
1481
  "step": 200
1482
+ },
1483
+ {
1484
+ "epoch": 1.7304277643260693,
1485
+ "grad_norm": 0.7049666047096252,
1486
+ "learning_rate": 6.854023410237687e-05,
1487
+ "loss": 0.8616,
1488
+ "step": 201
1489
+ },
1490
+ {
1491
+ "epoch": 1.7390368576809254,
1492
+ "grad_norm": 0.7968461513519287,
1493
+ "learning_rate": 6.8234871662076e-05,
1494
+ "loss": 1.0514,
1495
+ "step": 202
1496
+ },
1497
+ {
1498
+ "epoch": 1.7476459510357816,
1499
+ "grad_norm": 1.1434416770935059,
1500
+ "learning_rate": 6.792884061139813e-05,
1501
+ "loss": 0.8036,
1502
+ "step": 203
1503
+ },
1504
+ {
1505
+ "epoch": 1.7562550443906377,
1506
+ "grad_norm": 1.7658017873764038,
1507
+ "learning_rate": 6.762215641067727e-05,
1508
+ "loss": 1.4673,
1509
+ "step": 204
1510
+ },
1511
+ {
1512
+ "epoch": 1.7648641377454937,
1513
+ "grad_norm": 1.5402103662490845,
1514
+ "learning_rate": 6.731483455324374e-05,
1515
+ "loss": 1.3059,
1516
+ "step": 205
1517
+ },
1518
+ {
1519
+ "epoch": 1.7734732311003496,
1520
+ "grad_norm": 1.1886134147644043,
1521
+ "learning_rate": 6.700689056464162e-05,
1522
+ "loss": 1.1953,
1523
+ "step": 206
1524
+ },
1525
+ {
1526
+ "epoch": 1.7820823244552058,
1527
+ "grad_norm": 0.7550622224807739,
1528
+ "learning_rate": 6.66983400018443e-05,
1529
+ "loss": 1.0902,
1530
+ "step": 207
1531
+ },
1532
+ {
1533
+ "epoch": 1.790691417810062,
1534
+ "grad_norm": 0.6886234283447266,
1535
+ "learning_rate": 6.638919845246859e-05,
1536
+ "loss": 1.019,
1537
+ "step": 208
1538
+ },
1539
+ {
1540
+ "epoch": 1.799300511164918,
1541
+ "grad_norm": 0.8987452387809753,
1542
+ "learning_rate": 6.607948153398726e-05,
1543
+ "loss": 0.9793,
1544
+ "step": 209
1545
+ },
1546
+ {
1547
+ "epoch": 1.807909604519774,
1548
+ "grad_norm": 1.0460529327392578,
1549
+ "learning_rate": 6.576920489294011e-05,
1550
+ "loss": 0.9404,
1551
+ "step": 210
1552
+ },
1553
+ {
1554
+ "epoch": 1.81651869787463,
1555
+ "grad_norm": 1.1776190996170044,
1556
+ "learning_rate": 6.545838420414338e-05,
1557
+ "loss": 0.8875,
1558
+ "step": 211
1559
+ },
1560
+ {
1561
+ "epoch": 1.825127791229486,
1562
+ "grad_norm": 1.31589674949646,
1563
+ "learning_rate": 6.514703516989805e-05,
1564
+ "loss": 0.9621,
1565
+ "step": 212
1566
+ },
1567
+ {
1568
+ "epoch": 1.8337368845843423,
1569
+ "grad_norm": 1.11146879196167,
1570
+ "learning_rate": 6.483517351919646e-05,
1571
+ "loss": 0.8761,
1572
+ "step": 213
1573
+ },
1574
+ {
1575
+ "epoch": 1.8423459779391984,
1576
+ "grad_norm": 0.8180311918258667,
1577
+ "learning_rate": 6.452281500692775e-05,
1578
+ "loss": 0.9426,
1579
+ "step": 214
1580
+ },
1581
+ {
1582
+ "epoch": 1.8509550712940543,
1583
+ "grad_norm": 0.7222698330879211,
1584
+ "learning_rate": 6.420997541308195e-05,
1585
+ "loss": 0.625,
1586
+ "step": 215
1587
+ },
1588
+ {
1589
+ "epoch": 1.8595641646489103,
1590
+ "grad_norm": 1.14591646194458,
1591
+ "learning_rate": 6.389667054195275e-05,
1592
+ "loss": 1.1073,
1593
+ "step": 216
1594
+ },
1595
+ {
1596
+ "epoch": 1.8681732580037664,
1597
+ "grad_norm": 1.1335718631744385,
1598
+ "learning_rate": 6.358291622133912e-05,
1599
+ "loss": 1.2874,
1600
+ "step": 217
1601
+ },
1602
+ {
1603
+ "epoch": 1.8767823513586226,
1604
+ "grad_norm": 0.9186055064201355,
1605
+ "learning_rate": 6.326872830174567e-05,
1606
+ "loss": 1.1502,
1607
+ "step": 218
1608
+ },
1609
+ {
1610
+ "epoch": 1.8853914447134787,
1611
+ "grad_norm": 0.942480206489563,
1612
+ "learning_rate": 6.295412265558197e-05,
1613
+ "loss": 1.1254,
1614
+ "step": 219
1615
+ },
1616
+ {
1617
+ "epoch": 1.8940005380683347,
1618
+ "grad_norm": 0.8674805760383606,
1619
+ "learning_rate": 6.263911517636063e-05,
1620
+ "loss": 1.103,
1621
+ "step": 220
1622
+ },
1623
+ {
1624
+ "epoch": 1.9026096314231906,
1625
+ "grad_norm": 0.8475675582885742,
1626
+ "learning_rate": 6.232372177789439e-05,
1627
+ "loss": 1.0278,
1628
+ "step": 221
1629
+ },
1630
+ {
1631
+ "epoch": 1.9112187247780468,
1632
+ "grad_norm": 0.7307541966438293,
1633
+ "learning_rate": 6.200795839349223e-05,
1634
+ "loss": 0.9875,
1635
+ "step": 222
1636
+ },
1637
+ {
1638
+ "epoch": 1.919827818132903,
1639
+ "grad_norm": 0.7373369336128235,
1640
+ "learning_rate": 6.169184097515432e-05,
1641
+ "loss": 1.0035,
1642
+ "step": 223
1643
+ },
1644
+ {
1645
+ "epoch": 1.928436911487759,
1646
+ "grad_norm": 0.6376901865005493,
1647
+ "learning_rate": 6.137538549276629e-05,
1648
+ "loss": 0.7809,
1649
+ "step": 224
1650
+ },
1651
+ {
1652
+ "epoch": 1.937046004842615,
1653
+ "grad_norm": 0.6437143087387085,
1654
+ "learning_rate": 6.105860793329227e-05,
1655
+ "loss": 0.8046,
1656
+ "step": 225
1657
+ },
1658
+ {
1659
+ "epoch": 1.937046004842615,
1660
+ "eval_loss": 1.1572624444961548,
1661
+ "eval_runtime": 1.2769,
1662
+ "eval_samples_per_second": 39.158,
1663
+ "eval_steps_per_second": 10.181,
1664
+ "step": 225
1665
+ },
1666
+ {
1667
+ "epoch": 1.945655098197471,
1668
+ "grad_norm": 0.9029556512832642,
1669
+ "learning_rate": 6.074152429996749e-05,
1670
+ "loss": 0.9927,
1671
+ "step": 226
1672
+ },
1673
+ {
1674
+ "epoch": 1.9542641915523271,
1675
+ "grad_norm": 0.8548585772514343,
1676
+ "learning_rate": 6.042415061148954e-05,
1677
+ "loss": 1.0187,
1678
+ "step": 227
1679
+ },
1680
+ {
1681
+ "epoch": 1.9628732849071833,
1682
+ "grad_norm": 0.9854429960250854,
1683
+ "learning_rate": 6.010650290120936e-05,
1684
+ "loss": 0.8351,
1685
+ "step": 228
1686
+ },
1687
+ {
1688
+ "epoch": 1.9714823782620394,
1689
+ "grad_norm": 1.5009618997573853,
1690
+ "learning_rate": 5.978859721632112e-05,
1691
+ "loss": 1.4645,
1692
+ "step": 229
1693
+ },
1694
+ {
1695
+ "epoch": 1.9800914716168954,
1696
+ "grad_norm": 0.9327002763748169,
1697
+ "learning_rate": 5.947044961705153e-05,
1698
+ "loss": 1.0599,
1699
+ "step": 230
1700
+ },
1701
+ {
1702
+ "epoch": 1.9887005649717513,
1703
+ "grad_norm": 0.6681526899337769,
1704
+ "learning_rate": 5.9152076175848594e-05,
1705
+ "loss": 0.9357,
1706
+ "step": 231
1707
+ },
1708
+ {
1709
+ "epoch": 1.9973096583266075,
1710
+ "grad_norm": 0.8246638178825378,
1711
+ "learning_rate": 5.883349297656956e-05,
1712
+ "loss": 0.9124,
1713
+ "step": 232
1714
+ },
1715
+ {
1716
+ "epoch": 2.0059187516814636,
1717
+ "grad_norm": 0.8793002963066101,
1718
+ "learning_rate": 5.851471611366842e-05,
1719
+ "loss": 1.1954,
1720
+ "step": 233
1721
+ },
1722
+ {
1723
+ "epoch": 2.0145278450363198,
1724
+ "grad_norm": 0.7132886052131653,
1725
+ "learning_rate": 5.81957616913828e-05,
1726
+ "loss": 1.0198,
1727
+ "step": 234
1728
+ },
1729
+ {
1730
+ "epoch": 2.0231369383911755,
1731
+ "grad_norm": 0.7113811373710632,
1732
+ "learning_rate": 5.7876645822920464e-05,
1733
+ "loss": 0.9339,
1734
+ "step": 235
1735
+ },
1736
+ {
1737
+ "epoch": 2.0317460317460316,
1738
+ "grad_norm": 0.6471757888793945,
1739
+ "learning_rate": 5.75573846296452e-05,
1740
+ "loss": 0.9608,
1741
+ "step": 236
1742
+ },
1743
+ {
1744
+ "epoch": 2.040355125100888,
1745
+ "grad_norm": 0.7370480298995972,
1746
+ "learning_rate": 5.7237994240262515e-05,
1747
+ "loss": 0.9259,
1748
+ "step": 237
1749
+ },
1750
+ {
1751
+ "epoch": 2.048964218455744,
1752
+ "grad_norm": 0.7430047392845154,
1753
+ "learning_rate": 5.691849079000465e-05,
1754
+ "loss": 0.8468,
1755
+ "step": 238
1756
+ },
1757
+ {
1758
+ "epoch": 2.0575733118106,
1759
+ "grad_norm": 0.6476728320121765,
1760
+ "learning_rate": 5.659889041981564e-05,
1761
+ "loss": 0.7466,
1762
+ "step": 239
1763
+ },
1764
+ {
1765
+ "epoch": 2.066182405165456,
1766
+ "grad_norm": 1.0301920175552368,
1767
+ "learning_rate": 5.627920927553575e-05,
1768
+ "loss": 0.8599,
1769
+ "step": 240
1770
+ },
1771
+ {
1772
+ "epoch": 2.074791498520312,
1773
+ "grad_norm": 0.6990707516670227,
1774
+ "learning_rate": 5.5959463507085866e-05,
1775
+ "loss": 0.6485,
1776
+ "step": 241
1777
+ },
1778
+ {
1779
+ "epoch": 2.083400591875168,
1780
+ "grad_norm": 0.6740060448646545,
1781
+ "learning_rate": 5.563966926765163e-05,
1782
+ "loss": 0.6521,
1783
+ "step": 242
1784
+ },
1785
+ {
1786
+ "epoch": 2.0920096852300243,
1787
+ "grad_norm": 0.7777407765388489,
1788
+ "learning_rate": 5.53198427128674e-05,
1789
+ "loss": 0.8288,
1790
+ "step": 243
1791
+ },
1792
+ {
1793
+ "epoch": 2.1006187785848804,
1794
+ "grad_norm": 0.7086468935012817,
1795
+ "learning_rate": 5.500000000000001e-05,
1796
+ "loss": 0.6116,
1797
+ "step": 244
1798
+ },
1799
+ {
1800
+ "epoch": 2.109227871939736,
1801
+ "grad_norm": 1.1495126485824585,
1802
+ "learning_rate": 5.468015728713262e-05,
1803
+ "loss": 1.2629,
1804
+ "step": 245
1805
+ },
1806
+ {
1807
+ "epoch": 2.1178369652945923,
1808
+ "grad_norm": 1.1655594110488892,
1809
+ "learning_rate": 5.4360330732348377e-05,
1810
+ "loss": 1.1571,
1811
+ "step": 246
1812
+ },
1813
+ {
1814
+ "epoch": 2.1264460586494485,
1815
+ "grad_norm": 1.0574018955230713,
1816
+ "learning_rate": 5.404053649291415e-05,
1817
+ "loss": 1.0127,
1818
+ "step": 247
1819
+ },
1820
+ {
1821
+ "epoch": 2.1350551520043046,
1822
+ "grad_norm": 0.8099690675735474,
1823
+ "learning_rate": 5.372079072446427e-05,
1824
+ "loss": 0.9191,
1825
+ "step": 248
1826
+ },
1827
+ {
1828
+ "epoch": 2.143664245359161,
1829
+ "grad_norm": 0.7748256325721741,
1830
+ "learning_rate": 5.340110958018438e-05,
1831
+ "loss": 0.86,
1832
+ "step": 249
1833
+ },
1834
+ {
1835
+ "epoch": 2.1522733387140165,
1836
+ "grad_norm": 1.3335024118423462,
1837
+ "learning_rate": 5.308150920999537e-05,
1838
+ "loss": 0.9134,
1839
+ "step": 250
1840
+ },
1841
+ {
1842
+ "epoch": 2.1522733387140165,
1843
+ "eval_loss": 1.1941081285476685,
1844
+ "eval_runtime": 1.2717,
1845
+ "eval_samples_per_second": 39.316,
1846
+ "eval_steps_per_second": 10.222,
1847
+ "step": 250
1848
  }
1849
  ],
1850
  "logging_steps": 1,
 
1859
  "early_stopping_threshold": 0.0
1860
  },
1861
  "attributes": {
1862
+ "early_stopping_patience_counter": 1
1863
  }
1864
  },
1865
  "TrainerControl": {
 
1868
  "should_evaluate": false,
1869
  "should_log": false,
1870
  "should_save": true,
1871
+ "should_training_stop": true
1872
  },
1873
  "attributes": {}
1874
  }
1875
  },
1876
+ "total_flos": 5.3114226274310554e+17,
1877
  "train_batch_size": 1,
1878
  "trial_name": null,
1879
  "trial_params": null