{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.17394329448599757, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4999999999999999e-05, "loss": 5.797, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.2e-05, "loss": 4.6838, "step": 20 }, { "epoch": 0.0, "learning_rate": 7.199999999999999e-05, "loss": 5.6319, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.000102, "loss": 5.0822, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.000129, "loss": 4.2645, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.000159, "loss": 4.4867, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.00018899999999999999, "loss": 4.321, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.00021899999999999998, "loss": 3.9697, "step": 80 }, { "epoch": 0.01, "learning_rate": 0.000249, "loss": 3.8873, "step": 90 }, { "epoch": 0.01, "learning_rate": 0.000279, "loss": 3.9087, "step": 100 }, { "epoch": 0.01, "learning_rate": 0.0002999947786737831, "loss": 3.7777, "step": 110 }, { "epoch": 0.01, "learning_rate": 0.0002999773742530603, "loss": 4.0537, "step": 120 }, { "epoch": 0.01, "learning_rate": 0.00029995996983233736, "loss": 3.8181, "step": 130 }, { "epoch": 0.01, "learning_rate": 0.00029994256541161454, "loss": 3.7261, "step": 140 }, { "epoch": 0.01, "learning_rate": 0.00029992516099089166, "loss": 4.0132, "step": 150 }, { "epoch": 0.01, "learning_rate": 0.0002999077565701688, "loss": 3.7437, "step": 160 }, { "epoch": 0.01, "learning_rate": 0.0002998903521494459, "loss": 3.5736, "step": 170 }, { "epoch": 0.01, "learning_rate": 0.00029987294772872305, "loss": 3.8578, "step": 180 }, { "epoch": 0.01, "learning_rate": 0.00029985554330800023, "loss": 3.5236, "step": 190 }, { "epoch": 0.01, "learning_rate": 0.00029983813888727736, "loss": 3.8339, "step": 200 }, { "epoch": 0.01, "learning_rate": 0.0002998207344665545, "loss": 3.8672, "step": 210 }, { "epoch": 0.01, "learning_rate": 0.0002998033300458316, "loss": 3.4447, "step": 220 }, { "epoch": 0.01, "learning_rate": 0.00029978592562510874, "loss": 3.3767, "step": 230 }, { "epoch": 0.01, "learning_rate": 0.0002997685212043859, "loss": 3.8753, "step": 240 }, { "epoch": 0.01, "learning_rate": 0.000299751116783663, "loss": 3.825, "step": 250 }, { "epoch": 0.02, "learning_rate": 0.0002997337123629402, "loss": 3.7691, "step": 260 }, { "epoch": 0.02, "learning_rate": 0.0002997163079422173, "loss": 3.7529, "step": 270 }, { "epoch": 0.02, "learning_rate": 0.00029969890352149443, "loss": 3.7102, "step": 280 }, { "epoch": 0.02, "learning_rate": 0.00029968149910077155, "loss": 3.6104, "step": 290 }, { "epoch": 0.02, "learning_rate": 0.0002996640946800487, "loss": 3.7396, "step": 300 }, { "epoch": 0.02, "learning_rate": 0.00029964669025932586, "loss": 3.5363, "step": 310 }, { "epoch": 0.02, "learning_rate": 0.000299629285838603, "loss": 3.7725, "step": 320 }, { "epoch": 0.02, "learning_rate": 0.0002996118814178801, "loss": 3.728, "step": 330 }, { "epoch": 0.02, "learning_rate": 0.00029959447699715725, "loss": 3.333, "step": 340 }, { "epoch": 0.02, "learning_rate": 0.00029957707257643437, "loss": 3.7651, "step": 350 }, { "epoch": 0.02, "learning_rate": 0.00029955966815571155, "loss": 3.6743, "step": 360 }, { "epoch": 0.02, "learning_rate": 0.00029954226373498863, "loss": 3.6236, "step": 370 }, { "epoch": 0.02, "learning_rate": 0.0002995248593142658, "loss": 3.837, "step": 380 }, { "epoch": 0.02, "learning_rate": 0.00029950745489354294, "loss": 3.8097, "step": 390 }, { "epoch": 0.02, "learning_rate": 0.00029949005047282006, "loss": 3.886, "step": 400 }, { "epoch": 0.02, "learning_rate": 0.0002994726460520972, "loss": 3.7826, "step": 410 }, { "epoch": 0.02, "learning_rate": 0.0002994552416313743, "loss": 3.6854, "step": 420 }, { "epoch": 0.02, "learning_rate": 0.0002994378372106515, "loss": 3.4106, "step": 430 }, { "epoch": 0.03, "learning_rate": 0.00029942043278992857, "loss": 3.7991, "step": 440 }, { "epoch": 0.03, "learning_rate": 0.00029940302836920575, "loss": 3.5353, "step": 450 }, { "epoch": 0.03, "learning_rate": 0.0002993856239484829, "loss": 3.7661, "step": 460 }, { "epoch": 0.03, "learning_rate": 0.00029936821952776, "loss": 3.43, "step": 470 }, { "epoch": 0.03, "learning_rate": 0.00029935081510703714, "loss": 3.5965, "step": 480 }, { "epoch": 0.03, "learning_rate": 0.00029933341068631426, "loss": 3.6576, "step": 490 }, { "epoch": 0.03, "learning_rate": 0.00029931600626559144, "loss": 3.6634, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.00029929860184486857, "loss": 3.5374, "step": 510 }, { "epoch": 0.03, "learning_rate": 0.0002992811974241457, "loss": 3.6912, "step": 520 }, { "epoch": 0.03, "learning_rate": 0.0002992637930034228, "loss": 3.6375, "step": 530 }, { "epoch": 0.03, "learning_rate": 0.00029924638858269995, "loss": 3.3367, "step": 540 }, { "epoch": 0.03, "learning_rate": 0.00029922898416197714, "loss": 3.7179, "step": 550 }, { "epoch": 0.03, "learning_rate": 0.00029921157974125426, "loss": 3.5059, "step": 560 }, { "epoch": 0.03, "learning_rate": 0.0002991941753205314, "loss": 3.2627, "step": 570 }, { "epoch": 0.03, "learning_rate": 0.0002991767708998085, "loss": 3.5826, "step": 580 }, { "epoch": 0.03, "learning_rate": 0.0002991593664790857, "loss": 3.5854, "step": 590 }, { "epoch": 0.03, "learning_rate": 0.00029914196205836277, "loss": 3.6205, "step": 600 }, { "epoch": 0.04, "learning_rate": 0.00029912455763763995, "loss": 3.3627, "step": 610 }, { "epoch": 0.04, "learning_rate": 0.0002991071532169171, "loss": 3.7299, "step": 620 }, { "epoch": 0.04, "learning_rate": 0.0002990897487961942, "loss": 3.4119, "step": 630 }, { "epoch": 0.04, "learning_rate": 0.00029907234437547133, "loss": 3.6321, "step": 640 }, { "epoch": 0.04, "learning_rate": 0.00029905493995474846, "loss": 3.4665, "step": 650 }, { "epoch": 0.04, "learning_rate": 0.00029903753553402564, "loss": 3.3959, "step": 660 }, { "epoch": 0.04, "learning_rate": 0.00029902013111330277, "loss": 3.4348, "step": 670 }, { "epoch": 0.04, "learning_rate": 0.0002990027266925799, "loss": 3.6478, "step": 680 }, { "epoch": 0.04, "learning_rate": 0.000298985322271857, "loss": 3.5589, "step": 690 }, { "epoch": 0.04, "learning_rate": 0.00029896791785113415, "loss": 3.4357, "step": 700 }, { "epoch": 0.04, "learning_rate": 0.00029895051343041133, "loss": 3.5833, "step": 710 }, { "epoch": 0.04, "learning_rate": 0.0002989331090096884, "loss": 3.4633, "step": 720 }, { "epoch": 0.04, "learning_rate": 0.0002989157045889656, "loss": 3.4848, "step": 730 }, { "epoch": 0.04, "learning_rate": 0.0002988983001682427, "loss": 3.1688, "step": 740 }, { "epoch": 0.04, "learning_rate": 0.00029888089574751984, "loss": 3.7193, "step": 750 }, { "epoch": 0.04, "learning_rate": 0.00029886349132679697, "loss": 3.5895, "step": 760 }, { "epoch": 0.04, "learning_rate": 0.0002988460869060741, "loss": 3.5518, "step": 770 }, { "epoch": 0.05, "learning_rate": 0.0002988286824853513, "loss": 3.5631, "step": 780 }, { "epoch": 0.05, "learning_rate": 0.0002988112780646284, "loss": 3.5545, "step": 790 }, { "epoch": 0.05, "learning_rate": 0.00029879387364390553, "loss": 3.5699, "step": 800 }, { "epoch": 0.05, "learning_rate": 0.00029877646922318266, "loss": 3.5634, "step": 810 }, { "epoch": 0.05, "learning_rate": 0.0002987590648024598, "loss": 3.607, "step": 820 }, { "epoch": 0.05, "learning_rate": 0.00029874166038173697, "loss": 3.3944, "step": 830 }, { "epoch": 0.05, "learning_rate": 0.00029872425596101404, "loss": 3.5578, "step": 840 }, { "epoch": 0.05, "learning_rate": 0.0002987068515402912, "loss": 3.3392, "step": 850 }, { "epoch": 0.05, "learning_rate": 0.00029868944711956835, "loss": 3.5491, "step": 860 }, { "epoch": 0.05, "learning_rate": 0.0002986720426988455, "loss": 3.4634, "step": 870 }, { "epoch": 0.05, "learning_rate": 0.0002986546382781226, "loss": 3.481, "step": 880 }, { "epoch": 0.05, "learning_rate": 0.00029863723385739973, "loss": 3.4969, "step": 890 }, { "epoch": 0.05, "learning_rate": 0.0002986198294366769, "loss": 3.4256, "step": 900 }, { "epoch": 0.05, "learning_rate": 0.00029860242501595404, "loss": 3.3778, "step": 910 }, { "epoch": 0.05, "learning_rate": 0.00029858502059523117, "loss": 3.291, "step": 920 }, { "epoch": 0.05, "learning_rate": 0.0002985676161745083, "loss": 3.5129, "step": 930 }, { "epoch": 0.05, "learning_rate": 0.0002985502117537854, "loss": 3.5895, "step": 940 }, { "epoch": 0.06, "learning_rate": 0.0002985328073330626, "loss": 3.4668, "step": 950 }, { "epoch": 0.06, "learning_rate": 0.0002985154029123397, "loss": 3.4919, "step": 960 }, { "epoch": 0.06, "learning_rate": 0.00029849799849161686, "loss": 3.5387, "step": 970 }, { "epoch": 0.06, "learning_rate": 0.000298480594070894, "loss": 3.5237, "step": 980 }, { "epoch": 0.06, "learning_rate": 0.0002984631896501711, "loss": 3.5108, "step": 990 }, { "epoch": 0.06, "learning_rate": 0.00029844578522944824, "loss": 3.6527, "step": 1000 }, { "epoch": 0.06, "learning_rate": 0.00029842838080872537, "loss": 3.2056, "step": 1010 }, { "epoch": 0.06, "learning_rate": 0.00029841097638800255, "loss": 3.4018, "step": 1020 }, { "epoch": 0.06, "learning_rate": 0.0002983935719672797, "loss": 3.4554, "step": 1030 }, { "epoch": 0.06, "learning_rate": 0.0002983761675465568, "loss": 3.4236, "step": 1040 }, { "epoch": 0.06, "learning_rate": 0.00029835876312583393, "loss": 3.3244, "step": 1050 }, { "epoch": 0.06, "learning_rate": 0.00029834135870511106, "loss": 3.4164, "step": 1060 }, { "epoch": 0.06, "learning_rate": 0.00029832395428438824, "loss": 3.0626, "step": 1070 }, { "epoch": 0.06, "learning_rate": 0.0002983065498636653, "loss": 3.7244, "step": 1080 }, { "epoch": 0.06, "learning_rate": 0.0002982891454429425, "loss": 3.4565, "step": 1090 }, { "epoch": 0.06, "learning_rate": 0.0002982717410222196, "loss": 3.5781, "step": 1100 }, { "epoch": 0.06, "learning_rate": 0.00029825433660149675, "loss": 3.2126, "step": 1110 }, { "epoch": 0.06, "learning_rate": 0.0002982369321807739, "loss": 3.6434, "step": 1120 }, { "epoch": 0.07, "learning_rate": 0.000298219527760051, "loss": 3.4129, "step": 1130 }, { "epoch": 0.07, "learning_rate": 0.0002982021233393282, "loss": 3.2887, "step": 1140 }, { "epoch": 0.07, "learning_rate": 0.0002981847189186053, "loss": 3.5407, "step": 1150 }, { "epoch": 0.07, "learning_rate": 0.00029816731449788244, "loss": 3.5717, "step": 1160 }, { "epoch": 0.07, "learning_rate": 0.00029814991007715957, "loss": 3.6888, "step": 1170 }, { "epoch": 0.07, "learning_rate": 0.0002981325056564367, "loss": 3.6167, "step": 1180 }, { "epoch": 0.07, "learning_rate": 0.0002981151012357139, "loss": 3.3514, "step": 1190 }, { "epoch": 0.07, "learning_rate": 0.00029809769681499095, "loss": 3.4163, "step": 1200 }, { "epoch": 0.07, "learning_rate": 0.00029808029239426813, "loss": 3.5967, "step": 1210 }, { "epoch": 0.07, "learning_rate": 0.00029806288797354526, "loss": 3.4587, "step": 1220 }, { "epoch": 0.07, "learning_rate": 0.0002980454835528224, "loss": 3.3907, "step": 1230 }, { "epoch": 0.07, "learning_rate": 0.0002980280791320995, "loss": 3.6969, "step": 1240 }, { "epoch": 0.07, "learning_rate": 0.00029801067471137664, "loss": 3.2609, "step": 1250 }, { "epoch": 0.07, "learning_rate": 0.0002979932702906538, "loss": 3.5595, "step": 1260 }, { "epoch": 0.07, "learning_rate": 0.00029797586586993095, "loss": 3.3332, "step": 1270 }, { "epoch": 0.07, "learning_rate": 0.0002979584614492081, "loss": 3.4112, "step": 1280 }, { "epoch": 0.07, "learning_rate": 0.0002979410570284852, "loss": 3.4097, "step": 1290 }, { "epoch": 0.08, "learning_rate": 0.00029792365260776233, "loss": 3.619, "step": 1300 }, { "epoch": 0.08, "learning_rate": 0.0002979062481870395, "loss": 3.2694, "step": 1310 }, { "epoch": 0.08, "learning_rate": 0.0002978888437663166, "loss": 3.6746, "step": 1320 }, { "epoch": 0.08, "learning_rate": 0.00029787143934559377, "loss": 3.3979, "step": 1330 }, { "epoch": 0.08, "learning_rate": 0.0002978540349248709, "loss": 3.5019, "step": 1340 }, { "epoch": 0.08, "learning_rate": 0.000297836630504148, "loss": 3.2628, "step": 1350 }, { "epoch": 0.08, "learning_rate": 0.00029781922608342515, "loss": 3.2965, "step": 1360 }, { "epoch": 0.08, "learning_rate": 0.0002978018216627023, "loss": 3.6118, "step": 1370 }, { "epoch": 0.08, "learning_rate": 0.00029778441724197946, "loss": 3.5439, "step": 1380 }, { "epoch": 0.08, "learning_rate": 0.00029776701282125653, "loss": 3.1755, "step": 1390 }, { "epoch": 0.08, "learning_rate": 0.0002977496084005337, "loss": 3.6912, "step": 1400 }, { "epoch": 0.08, "learning_rate": 0.00029773220397981084, "loss": 3.2572, "step": 1410 }, { "epoch": 0.08, "learning_rate": 0.00029771479955908797, "loss": 3.364, "step": 1420 }, { "epoch": 0.08, "learning_rate": 0.0002976973951383651, "loss": 3.4011, "step": 1430 }, { "epoch": 0.08, "learning_rate": 0.0002976799907176422, "loss": 3.5433, "step": 1440 }, { "epoch": 0.08, "learning_rate": 0.0002976625862969194, "loss": 3.4217, "step": 1450 }, { "epoch": 0.08, "learning_rate": 0.00029764518187619653, "loss": 3.331, "step": 1460 }, { "epoch": 0.09, "learning_rate": 0.00029762777745547366, "loss": 3.1582, "step": 1470 }, { "epoch": 0.09, "learning_rate": 0.0002976103730347508, "loss": 3.3783, "step": 1480 }, { "epoch": 0.09, "learning_rate": 0.0002975929686140279, "loss": 3.5096, "step": 1490 }, { "epoch": 0.09, "learning_rate": 0.0002975755641933051, "loss": 3.3047, "step": 1500 }, { "epoch": 0.09, "learning_rate": 0.00029755815977258217, "loss": 3.3239, "step": 1510 }, { "epoch": 0.09, "learning_rate": 0.00029754075535185935, "loss": 3.3897, "step": 1520 }, { "epoch": 0.09, "learning_rate": 0.0002975233509311365, "loss": 2.9298, "step": 1530 }, { "epoch": 0.09, "learning_rate": 0.0002975059465104136, "loss": 3.2706, "step": 1540 }, { "epoch": 0.09, "learning_rate": 0.00029748854208969073, "loss": 3.6979, "step": 1550 }, { "epoch": 0.09, "learning_rate": 0.00029747113766896786, "loss": 3.3929, "step": 1560 }, { "epoch": 0.09, "learning_rate": 0.00029745373324824504, "loss": 3.3763, "step": 1570 }, { "epoch": 0.09, "learning_rate": 0.00029743632882752217, "loss": 3.4914, "step": 1580 }, { "epoch": 0.09, "learning_rate": 0.0002974189244067993, "loss": 3.4507, "step": 1590 }, { "epoch": 0.09, "learning_rate": 0.0002974015199860764, "loss": 3.4506, "step": 1600 }, { "epoch": 0.09, "learning_rate": 0.00029738411556535355, "loss": 3.2699, "step": 1610 }, { "epoch": 0.09, "learning_rate": 0.00029736671114463073, "loss": 3.5554, "step": 1620 }, { "epoch": 0.09, "learning_rate": 0.00029734930672390786, "loss": 3.677, "step": 1630 }, { "epoch": 0.1, "learning_rate": 0.000297331902303185, "loss": 3.3466, "step": 1640 }, { "epoch": 0.1, "learning_rate": 0.0002973144978824621, "loss": 3.4125, "step": 1650 }, { "epoch": 0.1, "learning_rate": 0.00029729709346173924, "loss": 3.4123, "step": 1660 }, { "epoch": 0.1, "learning_rate": 0.00029727968904101637, "loss": 3.1869, "step": 1670 }, { "epoch": 0.1, "learning_rate": 0.00029726228462029355, "loss": 3.5066, "step": 1680 }, { "epoch": 0.1, "learning_rate": 0.0002972448801995707, "loss": 3.68, "step": 1690 }, { "epoch": 0.1, "learning_rate": 0.0002972274757788478, "loss": 3.3881, "step": 1700 }, { "epoch": 0.1, "learning_rate": 0.00029721007135812493, "loss": 3.5452, "step": 1710 }, { "epoch": 0.1, "learning_rate": 0.00029719266693740206, "loss": 3.4605, "step": 1720 }, { "epoch": 0.1, "learning_rate": 0.00029717526251667924, "loss": 3.5601, "step": 1730 }, { "epoch": 0.1, "learning_rate": 0.00029715785809595637, "loss": 3.3588, "step": 1740 }, { "epoch": 0.1, "learning_rate": 0.0002971404536752335, "loss": 3.3235, "step": 1750 }, { "epoch": 0.1, "learning_rate": 0.0002971230492545106, "loss": 3.6218, "step": 1760 }, { "epoch": 0.1, "learning_rate": 0.00029710564483378775, "loss": 3.4837, "step": 1770 }, { "epoch": 0.1, "learning_rate": 0.00029708824041306493, "loss": 3.402, "step": 1780 }, { "epoch": 0.1, "learning_rate": 0.000297070835992342, "loss": 3.2912, "step": 1790 }, { "epoch": 0.1, "learning_rate": 0.0002970534315716192, "loss": 3.704, "step": 1800 }, { "epoch": 0.1, "learning_rate": 0.0002970360271508963, "loss": 3.6123, "step": 1810 }, { "epoch": 0.11, "learning_rate": 0.00029701862273017344, "loss": 3.3626, "step": 1820 }, { "epoch": 0.11, "learning_rate": 0.00029700121830945057, "loss": 3.2765, "step": 1830 }, { "epoch": 0.11, "learning_rate": 0.0002969838138887277, "loss": 3.3083, "step": 1840 }, { "epoch": 0.11, "learning_rate": 0.0002969664094680049, "loss": 3.8005, "step": 1850 }, { "epoch": 0.11, "learning_rate": 0.000296949005047282, "loss": 3.6119, "step": 1860 }, { "epoch": 0.11, "learning_rate": 0.00029693160062655913, "loss": 3.2547, "step": 1870 }, { "epoch": 0.11, "learning_rate": 0.00029691419620583626, "loss": 3.4915, "step": 1880 }, { "epoch": 0.11, "learning_rate": 0.0002968967917851134, "loss": 3.4441, "step": 1890 }, { "epoch": 0.11, "learning_rate": 0.00029687938736439057, "loss": 3.6537, "step": 1900 }, { "epoch": 0.11, "learning_rate": 0.00029686198294366764, "loss": 3.2625, "step": 1910 }, { "epoch": 0.11, "learning_rate": 0.0002968445785229448, "loss": 3.411, "step": 1920 }, { "epoch": 0.11, "learning_rate": 0.00029682717410222195, "loss": 2.997, "step": 1930 }, { "epoch": 0.11, "learning_rate": 0.0002968097696814991, "loss": 3.3158, "step": 1940 }, { "epoch": 0.11, "learning_rate": 0.0002967923652607762, "loss": 3.0668, "step": 1950 }, { "epoch": 0.11, "learning_rate": 0.00029677496084005333, "loss": 3.3956, "step": 1960 }, { "epoch": 0.11, "learning_rate": 0.0002967575564193305, "loss": 3.5991, "step": 1970 }, { "epoch": 0.11, "learning_rate": 0.00029674015199860764, "loss": 3.5781, "step": 1980 }, { "epoch": 0.12, "learning_rate": 0.00029672274757788477, "loss": 3.5981, "step": 1990 }, { "epoch": 0.12, "learning_rate": 0.0002967053431571619, "loss": 3.6671, "step": 2000 }, { "epoch": 0.12, "learning_rate": 0.000296687938736439, "loss": 2.9026, "step": 2010 }, { "epoch": 0.12, "learning_rate": 0.0002966705343157162, "loss": 3.2852, "step": 2020 }, { "epoch": 0.12, "learning_rate": 0.0002966531298949933, "loss": 3.439, "step": 2030 }, { "epoch": 0.12, "learning_rate": 0.00029663572547427046, "loss": 3.3326, "step": 2040 }, { "epoch": 0.12, "learning_rate": 0.0002966183210535476, "loss": 3.3541, "step": 2050 }, { "epoch": 0.12, "learning_rate": 0.0002966009166328247, "loss": 3.3194, "step": 2060 }, { "epoch": 0.12, "learning_rate": 0.00029658351221210184, "loss": 3.5999, "step": 2070 }, { "epoch": 0.12, "learning_rate": 0.00029656610779137896, "loss": 3.1402, "step": 2080 }, { "epoch": 0.12, "learning_rate": 0.00029654870337065615, "loss": 3.0779, "step": 2090 }, { "epoch": 0.12, "learning_rate": 0.0002965312989499333, "loss": 3.4571, "step": 2100 }, { "epoch": 0.12, "learning_rate": 0.0002965138945292104, "loss": 3.2815, "step": 2110 }, { "epoch": 0.12, "learning_rate": 0.00029649649010848753, "loss": 3.2003, "step": 2120 }, { "epoch": 0.12, "learning_rate": 0.00029647908568776466, "loss": 3.5262, "step": 2130 }, { "epoch": 0.12, "learning_rate": 0.00029646168126704184, "loss": 3.1835, "step": 2140 }, { "epoch": 0.12, "learning_rate": 0.0002964442768463189, "loss": 3.0947, "step": 2150 }, { "epoch": 0.13, "learning_rate": 0.0002964268724255961, "loss": 3.5398, "step": 2160 }, { "epoch": 0.13, "learning_rate": 0.0002964094680048732, "loss": 3.319, "step": 2170 }, { "epoch": 0.13, "learning_rate": 0.00029639206358415035, "loss": 3.3722, "step": 2180 }, { "epoch": 0.13, "learning_rate": 0.0002963746591634275, "loss": 3.3396, "step": 2190 }, { "epoch": 0.13, "learning_rate": 0.0002963572547427046, "loss": 3.2734, "step": 2200 }, { "epoch": 0.13, "learning_rate": 0.0002963398503219818, "loss": 3.3672, "step": 2210 }, { "epoch": 0.13, "learning_rate": 0.0002963224459012589, "loss": 3.3559, "step": 2220 }, { "epoch": 0.13, "learning_rate": 0.00029630504148053604, "loss": 3.3601, "step": 2230 }, { "epoch": 0.13, "learning_rate": 0.00029628763705981316, "loss": 3.6175, "step": 2240 }, { "epoch": 0.13, "learning_rate": 0.0002962702326390903, "loss": 3.2681, "step": 2250 }, { "epoch": 0.13, "learning_rate": 0.00029625282821836747, "loss": 3.3181, "step": 2260 }, { "epoch": 0.13, "learning_rate": 0.00029623542379764455, "loss": 3.5367, "step": 2270 }, { "epoch": 0.13, "learning_rate": 0.00029621801937692173, "loss": 3.2207, "step": 2280 }, { "epoch": 0.13, "learning_rate": 0.00029620061495619885, "loss": 3.2358, "step": 2290 }, { "epoch": 0.13, "learning_rate": 0.000296183210535476, "loss": 3.4455, "step": 2300 }, { "epoch": 0.13, "learning_rate": 0.0002961658061147531, "loss": 3.4017, "step": 2310 }, { "epoch": 0.13, "learning_rate": 0.00029614840169403024, "loss": 3.4288, "step": 2320 }, { "epoch": 0.14, "learning_rate": 0.0002961309972733074, "loss": 3.3411, "step": 2330 }, { "epoch": 0.14, "learning_rate": 0.00029611359285258455, "loss": 3.1286, "step": 2340 }, { "epoch": 0.14, "learning_rate": 0.00029609618843186167, "loss": 2.9427, "step": 2350 }, { "epoch": 0.14, "learning_rate": 0.0002960787840111388, "loss": 3.4088, "step": 2360 }, { "epoch": 0.14, "learning_rate": 0.0002960613795904159, "loss": 3.3628, "step": 2370 }, { "epoch": 0.14, "learning_rate": 0.00029604397516969305, "loss": 3.4993, "step": 2380 }, { "epoch": 0.14, "learning_rate": 0.0002960265707489702, "loss": 3.6364, "step": 2390 }, { "epoch": 0.14, "learning_rate": 0.00029600916632824736, "loss": 3.482, "step": 2400 }, { "epoch": 0.14, "learning_rate": 0.0002959917619075245, "loss": 3.4856, "step": 2410 }, { "epoch": 0.14, "learning_rate": 0.0002959743574868016, "loss": 3.4327, "step": 2420 }, { "epoch": 0.14, "learning_rate": 0.00029595695306607874, "loss": 3.4458, "step": 2430 }, { "epoch": 0.14, "learning_rate": 0.00029593954864535587, "loss": 3.3191, "step": 2440 }, { "epoch": 0.14, "learning_rate": 0.00029592214422463305, "loss": 3.4966, "step": 2450 }, { "epoch": 0.14, "learning_rate": 0.0002959047398039101, "loss": 3.1979, "step": 2460 }, { "epoch": 0.14, "learning_rate": 0.0002958873353831873, "loss": 3.3932, "step": 2470 }, { "epoch": 0.14, "learning_rate": 0.00029586993096246444, "loss": 3.1029, "step": 2480 }, { "epoch": 0.14, "learning_rate": 0.00029585252654174156, "loss": 3.2549, "step": 2490 }, { "epoch": 0.14, "learning_rate": 0.0002958351221210187, "loss": 3.3986, "step": 2500 }, { "epoch": 0.15, "learning_rate": 0.0002958177177002958, "loss": 3.22, "step": 2510 }, { "epoch": 0.15, "learning_rate": 0.000295800313279573, "loss": 3.0488, "step": 2520 }, { "epoch": 0.15, "learning_rate": 0.0002957829088588501, "loss": 3.5085, "step": 2530 }, { "epoch": 0.15, "learning_rate": 0.00029576550443812725, "loss": 3.2679, "step": 2540 }, { "epoch": 0.15, "learning_rate": 0.0002957481000174044, "loss": 3.4828, "step": 2550 }, { "epoch": 0.15, "learning_rate": 0.0002957306955966815, "loss": 3.4081, "step": 2560 }, { "epoch": 0.15, "learning_rate": 0.0002957132911759587, "loss": 3.3543, "step": 2570 }, { "epoch": 0.15, "learning_rate": 0.00029569588675523576, "loss": 3.6927, "step": 2580 }, { "epoch": 0.15, "learning_rate": 0.00029567848233451294, "loss": 3.215, "step": 2590 }, { "epoch": 0.15, "learning_rate": 0.00029566107791379007, "loss": 3.502, "step": 2600 }, { "epoch": 0.15, "learning_rate": 0.0002956436734930672, "loss": 3.4641, "step": 2610 }, { "epoch": 0.15, "learning_rate": 0.0002956262690723443, "loss": 3.2631, "step": 2620 }, { "epoch": 0.15, "learning_rate": 0.00029560886465162145, "loss": 3.3747, "step": 2630 }, { "epoch": 0.15, "learning_rate": 0.00029559146023089863, "loss": 3.3669, "step": 2640 }, { "epoch": 0.15, "learning_rate": 0.00029557405581017576, "loss": 2.8722, "step": 2650 }, { "epoch": 0.15, "learning_rate": 0.0002955566513894529, "loss": 3.4885, "step": 2660 }, { "epoch": 0.15, "learning_rate": 0.00029553924696873, "loss": 3.5238, "step": 2670 }, { "epoch": 0.16, "learning_rate": 0.00029552184254800714, "loss": 3.2318, "step": 2680 }, { "epoch": 0.16, "learning_rate": 0.0002955044381272843, "loss": 3.4054, "step": 2690 }, { "epoch": 0.16, "learning_rate": 0.00029548703370656145, "loss": 3.2475, "step": 2700 }, { "epoch": 0.16, "learning_rate": 0.0002954696292858386, "loss": 3.3495, "step": 2710 }, { "epoch": 0.16, "learning_rate": 0.0002954522248651157, "loss": 3.5605, "step": 2720 }, { "epoch": 0.16, "learning_rate": 0.00029543482044439283, "loss": 3.0016, "step": 2730 }, { "epoch": 0.16, "learning_rate": 0.00029541741602366996, "loss": 3.1471, "step": 2740 }, { "epoch": 0.16, "learning_rate": 0.00029540001160294714, "loss": 3.5481, "step": 2750 }, { "epoch": 0.16, "learning_rate": 0.00029538260718222427, "loss": 3.4854, "step": 2760 }, { "epoch": 0.16, "learning_rate": 0.0002953652027615014, "loss": 3.535, "step": 2770 }, { "epoch": 0.16, "learning_rate": 0.0002953477983407785, "loss": 3.2613, "step": 2780 }, { "epoch": 0.16, "learning_rate": 0.00029533039392005565, "loss": 3.3677, "step": 2790 }, { "epoch": 0.16, "learning_rate": 0.00029531298949933283, "loss": 3.2767, "step": 2800 }, { "epoch": 0.16, "learning_rate": 0.00029529558507860996, "loss": 3.4752, "step": 2810 }, { "epoch": 0.16, "learning_rate": 0.0002952781806578871, "loss": 3.6749, "step": 2820 }, { "epoch": 0.16, "learning_rate": 0.0002952607762371642, "loss": 3.2192, "step": 2830 }, { "epoch": 0.16, "learning_rate": 0.00029524337181644134, "loss": 3.2643, "step": 2840 }, { "epoch": 0.17, "learning_rate": 0.0002952259673957185, "loss": 3.5323, "step": 2850 }, { "epoch": 0.17, "learning_rate": 0.0002952085629749956, "loss": 3.1769, "step": 2860 }, { "epoch": 0.17, "learning_rate": 0.0002951911585542728, "loss": 3.4551, "step": 2870 }, { "epoch": 0.17, "learning_rate": 0.0002951737541335499, "loss": 3.387, "step": 2880 }, { "epoch": 0.17, "learning_rate": 0.00029515634971282703, "loss": 3.2243, "step": 2890 }, { "epoch": 0.17, "learning_rate": 0.00029513894529210416, "loss": 2.9179, "step": 2900 }, { "epoch": 0.17, "learning_rate": 0.0002951215408713813, "loss": 3.2905, "step": 2910 }, { "epoch": 0.17, "learning_rate": 0.00029510413645065847, "loss": 3.262, "step": 2920 }, { "epoch": 0.17, "learning_rate": 0.0002950867320299356, "loss": 3.2404, "step": 2930 }, { "epoch": 0.17, "learning_rate": 0.0002950693276092127, "loss": 3.577, "step": 2940 }, { "epoch": 0.17, "learning_rate": 0.00029505192318848985, "loss": 3.5627, "step": 2950 }, { "epoch": 0.17, "learning_rate": 0.000295034518767767, "loss": 3.5328, "step": 2960 }, { "epoch": 0.17, "learning_rate": 0.00029501711434704416, "loss": 3.5149, "step": 2970 }, { "epoch": 0.17, "learning_rate": 0.00029499970992632123, "loss": 3.2421, "step": 2980 }, { "epoch": 0.17, "learning_rate": 0.0002949823055055984, "loss": 3.0618, "step": 2990 }, { "epoch": 0.17, "learning_rate": 0.00029496490108487554, "loss": 3.2825, "step": 3000 } ], "max_steps": 172470, "num_train_epochs": 10, "total_flos": 6.54510518697984e+16, "trial_name": null, "trial_params": null }