{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1089, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.8348623853211011e-06, "loss": 1.1015, "step": 1 }, { "epoch": 0.0, "learning_rate": 9.174311926605506e-06, "loss": 1.1129, "step": 5 }, { "epoch": 0.01, "learning_rate": 1.834862385321101e-05, "loss": 1.1292, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.7522935779816515e-05, "loss": 1.1016, "step": 15 }, { "epoch": 0.02, "learning_rate": 3.669724770642202e-05, "loss": 1.0543, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.587155963302753e-05, "loss": 1.0457, "step": 25 }, { "epoch": 0.03, "learning_rate": 5.504587155963303e-05, "loss": 1.0129, "step": 30 }, { "epoch": 0.03, "learning_rate": 6.422018348623854e-05, "loss": 1.0241, "step": 35 }, { "epoch": 0.04, "learning_rate": 7.339449541284404e-05, "loss": 0.9982, "step": 40 }, { "epoch": 0.04, "learning_rate": 8.256880733944955e-05, "loss": 0.9938, "step": 45 }, { "epoch": 0.05, "learning_rate": 9.174311926605506e-05, "loss": 0.9913, "step": 50 }, { "epoch": 0.05, "learning_rate": 0.00010091743119266055, "loss": 1.0032, "step": 55 }, { "epoch": 0.06, "learning_rate": 0.00011009174311926606, "loss": 0.9909, "step": 60 }, { "epoch": 0.06, "learning_rate": 0.00011926605504587157, "loss": 0.9596, "step": 65 }, { "epoch": 0.06, "learning_rate": 0.00012844036697247707, "loss": 0.9969, "step": 70 }, { "epoch": 0.07, "learning_rate": 0.00013761467889908258, "loss": 0.9935, "step": 75 }, { "epoch": 0.07, "learning_rate": 0.0001467889908256881, "loss": 0.9994, "step": 80 }, { "epoch": 0.08, "learning_rate": 0.0001559633027522936, "loss": 0.9852, "step": 85 }, { "epoch": 0.08, "learning_rate": 0.0001651376146788991, "loss": 0.9935, "step": 90 }, { "epoch": 0.09, "learning_rate": 0.00017431192660550458, "loss": 0.9803, "step": 95 }, { "epoch": 0.09, "learning_rate": 0.00018348623853211012, "loss": 0.9818, "step": 100 }, { "epoch": 0.1, "learning_rate": 0.0001926605504587156, "loss": 0.9631, "step": 105 }, { "epoch": 0.1, "learning_rate": 0.0001999994861726391, "loss": 0.9779, "step": 110 }, { "epoch": 0.11, "learning_rate": 0.00019998150276943902, "loss": 0.9627, "step": 115 }, { "epoch": 0.11, "learning_rate": 0.0001999378332783191, "loss": 0.9877, "step": 120 }, { "epoch": 0.11, "learning_rate": 0.00019986848891833845, "loss": 0.9804, "step": 125 }, { "epoch": 0.12, "learning_rate": 0.0001997734875046456, "loss": 0.9818, "step": 130 }, { "epoch": 0.12, "learning_rate": 0.00019965285344390184, "loss": 0.96, "step": 135 }, { "epoch": 0.13, "learning_rate": 0.00019950661772801063, "loss": 0.9643, "step": 140 }, { "epoch": 0.13, "learning_rate": 0.00019933481792615583, "loss": 0.9793, "step": 145 }, { "epoch": 0.14, "learning_rate": 0.00019913749817514963, "loss": 0.9812, "step": 150 }, { "epoch": 0.14, "learning_rate": 0.00019891470916809362, "loss": 0.972, "step": 155 }, { "epoch": 0.15, "learning_rate": 0.00019866650814135518, "loss": 0.9997, "step": 160 }, { "epoch": 0.15, "learning_rate": 0.00019839295885986296, "loss": 0.9662, "step": 165 }, { "epoch": 0.16, "learning_rate": 0.00019809413160072528, "loss": 0.9624, "step": 170 }, { "epoch": 0.16, "learning_rate": 0.00019777010313517518, "loss": 0.9753, "step": 175 }, { "epoch": 0.17, "learning_rate": 0.00019742095670884728, "loss": 0.9703, "step": 180 }, { "epoch": 0.17, "learning_rate": 0.0001970467820203915, "loss": 0.9556, "step": 185 }, { "epoch": 0.17, "learning_rate": 0.0001966476751984283, "loss": 0.9778, "step": 190 }, { "epoch": 0.18, "learning_rate": 0.0001962237387768529, "loss": 0.9602, "step": 195 }, { "epoch": 0.18, "learning_rate": 0.00019577508166849304, "loss": 0.9521, "step": 200 }, { "epoch": 0.19, "learning_rate": 0.00019530181913712872, "loss": 0.9537, "step": 205 }, { "epoch": 0.19, "learning_rate": 0.00019480407276787967, "loss": 0.9741, "step": 210 }, { "epoch": 0.2, "learning_rate": 0.0001942819704359693, "loss": 0.9739, "step": 215 }, { "epoch": 0.2, "learning_rate": 0.00019373564627387242, "loss": 0.9578, "step": 220 }, { "epoch": 0.21, "learning_rate": 0.0001931652406368554, "loss": 0.9472, "step": 225 }, { "epoch": 0.21, "learning_rate": 0.00019257090006691798, "loss": 0.9989, "step": 230 }, { "epoch": 0.22, "learning_rate": 0.0001919527772551451, "loss": 0.9303, "step": 235 }, { "epoch": 0.22, "learning_rate": 0.00019131103100247934, "loss": 0.9676, "step": 240 }, { "epoch": 0.22, "learning_rate": 0.0001906458261789238, "loss": 0.9912, "step": 245 }, { "epoch": 0.23, "learning_rate": 0.00018995733368118556, "loss": 0.948, "step": 250 }, { "epoch": 0.23, "learning_rate": 0.0001892457303887706, "loss": 0.9664, "step": 255 }, { "epoch": 0.24, "learning_rate": 0.00018851119911854233, "loss": 0.9388, "step": 260 }, { "epoch": 0.24, "learning_rate": 0.00018775392857775432, "loss": 0.9668, "step": 265 }, { "epoch": 0.25, "learning_rate": 0.00018697411331556956, "loss": 0.9585, "step": 270 }, { "epoch": 0.25, "learning_rate": 0.0001861719536730795, "loss": 0.9585, "step": 275 }, { "epoch": 0.26, "learning_rate": 0.0001853476557318346, "loss": 0.9553, "step": 280 }, { "epoch": 0.26, "learning_rate": 0.00018450143126090015, "loss": 0.9551, "step": 285 }, { "epoch": 0.27, "learning_rate": 0.0001836334976624511, "loss": 0.9558, "step": 290 }, { "epoch": 0.27, "learning_rate": 0.00018274407791591966, "loss": 0.9784, "step": 295 }, { "epoch": 0.28, "learning_rate": 0.00018183340052070997, "loss": 0.9687, "step": 300 }, { "epoch": 0.28, "learning_rate": 0.00018090169943749476, "loss": 0.9392, "step": 305 }, { "epoch": 0.28, "learning_rate": 0.0001799492140281086, "loss": 0.9778, "step": 310 }, { "epoch": 0.29, "learning_rate": 0.00017897618899405423, "loss": 0.9659, "step": 315 }, { "epoch": 0.29, "learning_rate": 0.00017798287431363641, "loss": 0.9503, "step": 320 }, { "epoch": 0.3, "learning_rate": 0.00017696952517774062, "loss": 0.9471, "step": 325 }, { "epoch": 0.3, "learning_rate": 0.000175936401924272, "loss": 0.9616, "step": 330 }, { "epoch": 0.31, "learning_rate": 0.00017488376997127283, "loss": 0.9513, "step": 335 }, { "epoch": 0.31, "learning_rate": 0.00017381189974873407, "loss": 0.9547, "step": 340 }, { "epoch": 0.32, "learning_rate": 0.00017272106662911973, "loss": 0.9712, "step": 345 }, { "epoch": 0.32, "learning_rate": 0.00017161155085662145, "loss": 0.9634, "step": 350 }, { "epoch": 0.33, "learning_rate": 0.00017048363747516117, "loss": 0.9585, "step": 355 }, { "epoch": 0.33, "learning_rate": 0.0001693376162551613, "loss": 0.9364, "step": 360 }, { "epoch": 0.34, "learning_rate": 0.00016817378161909996, "loss": 0.9714, "step": 365 }, { "epoch": 0.34, "learning_rate": 0.00016699243256587153, "loss": 0.9671, "step": 370 }, { "epoch": 0.34, "learning_rate": 0.00016579387259397127, "loss": 0.9533, "step": 375 }, { "epoch": 0.35, "learning_rate": 0.00016457840962352403, "loss": 0.9627, "step": 380 }, { "epoch": 0.35, "learning_rate": 0.00016334635591717703, "loss": 0.9724, "step": 385 }, { "epoch": 0.36, "learning_rate": 0.00016209802799987673, "loss": 0.9431, "step": 390 }, { "epoch": 0.36, "learning_rate": 0.00016083374657755134, "loss": 0.9737, "step": 395 }, { "epoch": 0.37, "learning_rate": 0.00015955383645471828, "loss": 0.9397, "step": 400 }, { "epoch": 0.37, "learning_rate": 0.0001582586264510396, "loss": 0.9472, "step": 405 }, { "epoch": 0.38, "learning_rate": 0.0001569484493168452, "loss": 0.958, "step": 410 }, { "epoch": 0.38, "learning_rate": 0.0001556236416476465, "loss": 0.9466, "step": 415 }, { "epoch": 0.39, "learning_rate": 0.00015428454379766223, "loss": 0.9675, "step": 420 }, { "epoch": 0.39, "learning_rate": 0.00015293149979237876, "loss": 0.9765, "step": 425 }, { "epoch": 0.39, "learning_rate": 0.0001515648572401667, "loss": 0.9678, "step": 430 }, { "epoch": 0.4, "learning_rate": 0.00015018496724297778, "loss": 0.9222, "step": 435 }, { "epoch": 0.4, "learning_rate": 0.00014879218430614345, "loss": 0.9466, "step": 440 }, { "epoch": 0.41, "learning_rate": 0.00014738686624729986, "loss": 0.9662, "step": 445 }, { "epoch": 0.41, "learning_rate": 0.00014596937410446117, "loss": 0.9705, "step": 450 }, { "epoch": 0.42, "learning_rate": 0.0001445400720432659, "loss": 0.9508, "step": 455 }, { "epoch": 0.42, "learning_rate": 0.00014309932726342005, "loss": 0.9887, "step": 460 }, { "epoch": 0.43, "learning_rate": 0.0001416475099043599, "loss": 0.955, "step": 465 }, { "epoch": 0.43, "learning_rate": 0.00014018499295016056, "loss": 0.9807, "step": 470 }, { "epoch": 0.44, "learning_rate": 0.00013871215213371284, "loss": 0.9364, "step": 475 }, { "epoch": 0.44, "learning_rate": 0.00013722936584019453, "loss": 0.9526, "step": 480 }, { "epoch": 0.45, "learning_rate": 0.0001357370150098601, "loss": 0.9643, "step": 485 }, { "epoch": 0.45, "learning_rate": 0.0001342354830401738, "loss": 0.9652, "step": 490 }, { "epoch": 0.45, "learning_rate": 0.0001327251556873117, "loss": 0.9517, "step": 495 }, { "epoch": 0.46, "learning_rate": 0.00013120642096705774, "loss": 0.9607, "step": 500 }, { "epoch": 0.46, "learning_rate": 0.00012967966905511906, "loss": 0.9208, "step": 505 }, { "epoch": 0.47, "learning_rate": 0.00012814529218688686, "loss": 0.9575, "step": 510 }, { "epoch": 0.47, "learning_rate": 0.00012660368455666752, "loss": 0.9392, "step": 515 }, { "epoch": 0.48, "learning_rate": 0.00012505524221641096, "loss": 0.9342, "step": 520 }, { "epoch": 0.48, "learning_rate": 0.00012350036297396154, "loss": 0.9472, "step": 525 }, { "epoch": 0.49, "learning_rate": 0.00012193944629085778, "loss": 1.0109, "step": 530 }, { "epoch": 0.49, "learning_rate": 0.00012037289317970757, "loss": 0.9227, "step": 535 }, { "epoch": 0.5, "learning_rate": 0.00011880110610116437, "loss": 0.9347, "step": 540 }, { "epoch": 0.5, "learning_rate": 0.0001172244888605319, "loss": 0.9505, "step": 545 }, { "epoch": 0.51, "learning_rate": 0.0001156434465040231, "loss": 0.9306, "step": 550 }, { "epoch": 0.51, "learning_rate": 0.00011405838521470029, "loss": 0.9388, "step": 555 }, { "epoch": 0.51, "learning_rate": 0.00011246971220812347, "loss": 0.9629, "step": 560 }, { "epoch": 0.52, "learning_rate": 0.00011087783562773311, "loss": 0.974, "step": 565 }, { "epoch": 0.52, "learning_rate": 0.00010928316443999462, "loss": 0.9714, "step": 570 }, { "epoch": 0.53, "learning_rate": 0.00010768610832933168, "loss": 0.9445, "step": 575 }, { "epoch": 0.53, "learning_rate": 0.00010608707759287452, "loss": 0.923, "step": 580 }, { "epoch": 0.54, "learning_rate": 0.00010448648303505151, "loss": 0.964, "step": 585 }, { "epoch": 0.54, "learning_rate": 0.00010288473586204969, "loss": 0.9407, "step": 590 }, { "epoch": 0.55, "learning_rate": 0.00010128224757617274, "loss": 0.9515, "step": 595 }, { "epoch": 0.55, "learning_rate": 9.967942987012241e-05, "loss": 0.9607, "step": 600 }, { "epoch": 0.56, "learning_rate": 9.80766945212313e-05, "loss": 0.9721, "step": 605 }, { "epoch": 0.56, "learning_rate": 9.647445328567368e-05, "loss": 0.9406, "step": 610 }, { "epoch": 0.56, "learning_rate": 9.48731177926821e-05, "loss": 0.9357, "step": 615 }, { "epoch": 0.57, "learning_rate": 9.327309943879604e-05, "loss": 0.9591, "step": 620 }, { "epoch": 0.57, "learning_rate": 9.167480928217108e-05, "loss": 0.9318, "step": 625 }, { "epoch": 0.58, "learning_rate": 9.007865793697426e-05, "loss": 0.9444, "step": 630 }, { "epoch": 0.58, "learning_rate": 8.848505546789408e-05, "loss": 0.9385, "step": 635 }, { "epoch": 0.59, "learning_rate": 8.689441128479134e-05, "loss": 0.9512, "step": 640 }, { "epoch": 0.59, "learning_rate": 8.530713403751821e-05, "loss": 0.946, "step": 645 }, { "epoch": 0.6, "learning_rate": 8.372363151093301e-05, "loss": 0.9398, "step": 650 }, { "epoch": 0.6, "learning_rate": 8.214431052013634e-05, "loss": 0.975, "step": 655 }, { "epoch": 0.61, "learning_rate": 8.056957680595732e-05, "loss": 0.9479, "step": 660 }, { "epoch": 0.61, "learning_rate": 7.899983493071507e-05, "loss": 0.9494, "step": 665 }, { "epoch": 0.62, "learning_rate": 7.743548817428339e-05, "loss": 0.9154, "step": 670 }, { "epoch": 0.62, "learning_rate": 7.587693843048475e-05, "loss": 0.955, "step": 675 }, { "epoch": 0.62, "learning_rate": 7.432458610384036e-05, "loss": 0.9365, "step": 680 }, { "epoch": 0.63, "learning_rate": 7.27788300067029e-05, "loss": 0.9434, "step": 685 }, { "epoch": 0.63, "learning_rate": 7.124006725679828e-05, "loss": 0.9331, "step": 690 }, { "epoch": 0.64, "learning_rate": 6.97086931752028e-05, "loss": 0.9297, "step": 695 }, { "epoch": 0.64, "learning_rate": 6.818510118478172e-05, "loss": 0.929, "step": 700 }, { "epoch": 0.65, "learning_rate": 6.666968270911584e-05, "loss": 0.9449, "step": 705 }, { "epoch": 0.65, "learning_rate": 6.516282707194119e-05, "loss": 0.9447, "step": 710 }, { "epoch": 0.66, "learning_rate": 6.366492139712886e-05, "loss": 0.9712, "step": 715 }, { "epoch": 0.66, "learning_rate": 6.217635050922923e-05, "loss": 0.9211, "step": 720 }, { "epoch": 0.67, "learning_rate": 6.069749683460765e-05, "loss": 0.9443, "step": 725 }, { "epoch": 0.67, "learning_rate": 5.9228740303195674e-05, "loss": 0.9826, "step": 730 }, { "epoch": 0.67, "learning_rate": 5.777045825088404e-05, "loss": 0.9367, "step": 735 }, { "epoch": 0.68, "learning_rate": 5.632302532258169e-05, "loss": 0.9359, "step": 740 }, { "epoch": 0.68, "learning_rate": 5.488681337596653e-05, "loss": 0.9327, "step": 745 }, { "epoch": 0.69, "learning_rate": 5.346219138595214e-05, "loss": 0.9501, "step": 750 }, { "epoch": 0.69, "learning_rate": 5.2049525349894625e-05, "loss": 0.9323, "step": 755 }, { "epoch": 0.7, "learning_rate": 5.0649178193565314e-05, "loss": 0.9356, "step": 760 }, { "epoch": 0.7, "learning_rate": 4.92615096779118e-05, "loss": 0.9723, "step": 765 }, { "epoch": 0.71, "learning_rate": 4.788687630663231e-05, "loss": 0.9523, "step": 770 }, { "epoch": 0.71, "learning_rate": 4.6525631234587034e-05, "loss": 0.9674, "step": 775 }, { "epoch": 0.72, "learning_rate": 4.517812417706967e-05, "loss": 0.9296, "step": 780 }, { "epoch": 0.72, "learning_rate": 4.384470131996252e-05, "loss": 0.9476, "step": 785 }, { "epoch": 0.73, "learning_rate": 4.252570523079852e-05, "loss": 0.9567, "step": 790 }, { "epoch": 0.73, "learning_rate": 4.12214747707527e-05, "loss": 0.9572, "step": 795 }, { "epoch": 0.73, "learning_rate": 3.9932345007585966e-05, "loss": 0.9341, "step": 800 }, { "epoch": 0.74, "learning_rate": 3.8658647129563364e-05, "loss": 0.9423, "step": 805 }, { "epoch": 0.74, "learning_rate": 3.740070836036893e-05, "loss": 0.9522, "step": 810 }, { "epoch": 0.75, "learning_rate": 3.615885187503946e-05, "loss": 0.9347, "step": 815 }, { "epoch": 0.75, "learning_rate": 3.493339671693765e-05, "loss": 0.9561, "step": 820 }, { "epoch": 0.76, "learning_rate": 3.372465771578771e-05, "loss": 0.9385, "step": 825 }, { "epoch": 0.76, "learning_rate": 3.253294540679257e-05, "loss": 0.9706, "step": 830 }, { "epoch": 0.77, "learning_rate": 3.135856595085498e-05, "loss": 0.9703, "step": 835 }, { "epoch": 0.77, "learning_rate": 3.0201821055922098e-05, "loss": 0.9481, "step": 840 }, { "epoch": 0.78, "learning_rate": 2.9063007899474216e-05, "loss": 0.9647, "step": 845 }, { "epoch": 0.78, "learning_rate": 2.7942419052177525e-05, "loss": 0.9527, "step": 850 }, { "epoch": 0.79, "learning_rate": 2.6840342402719866e-05, "loss": 0.9357, "step": 855 }, { "epoch": 0.79, "learning_rate": 2.5757061083850154e-05, "loss": 0.9572, "step": 860 }, { "epoch": 0.79, "learning_rate": 2.4692853399638917e-05, "loss": 0.9648, "step": 865 }, { "epoch": 0.8, "learning_rate": 2.3647992753979696e-05, "loss": 0.9563, "step": 870 }, { "epoch": 0.8, "learning_rate": 2.2622747580349314e-05, "loss": 0.9409, "step": 875 }, { "epoch": 0.81, "learning_rate": 2.161738127284517e-05, "loss": 0.9248, "step": 880 }, { "epoch": 0.81, "learning_rate": 2.063215211851678e-05, "loss": 0.9449, "step": 885 }, { "epoch": 0.82, "learning_rate": 1.9667313231009953e-05, "loss": 0.9702, "step": 890 }, { "epoch": 0.82, "learning_rate": 1.872311248553974e-05, "loss": 0.9405, "step": 895 }, { "epoch": 0.83, "learning_rate": 1.7799792455209018e-05, "loss": 0.9599, "step": 900 }, { "epoch": 0.83, "learning_rate": 1.689759034868961e-05, "loss": 0.9307, "step": 905 }, { "epoch": 0.84, "learning_rate": 1.601673794928127e-05, "loss": 0.9432, "step": 910 }, { "epoch": 0.84, "learning_rate": 1.5157461555364772e-05, "loss": 0.9605, "step": 915 }, { "epoch": 0.84, "learning_rate": 1.4319981922263637e-05, "loss": 0.9364, "step": 920 }, { "epoch": 0.85, "learning_rate": 1.350451420553065e-05, "loss": 0.9538, "step": 925 }, { "epoch": 0.85, "learning_rate": 1.2711267905672231e-05, "loss": 0.9521, "step": 930 }, { "epoch": 0.86, "learning_rate": 1.19404468143262e-05, "loss": 0.9437, "step": 935 }, { "epoch": 0.86, "learning_rate": 1.1192248961905949e-05, "loss": 0.98, "step": 940 }, { "epoch": 0.87, "learning_rate": 1.0466866566724698e-05, "loss": 0.9283, "step": 945 }, { "epoch": 0.87, "learning_rate": 9.764485985613092e-06, "loss": 0.9563, "step": 950 }, { "epoch": 0.88, "learning_rate": 9.085287666042508e-06, "loss": 0.9499, "step": 955 }, { "epoch": 0.88, "learning_rate": 8.429446099766614e-06, "loss": 0.9462, "step": 960 }, { "epoch": 0.89, "learning_rate": 7.797129777992952e-06, "loss": 0.9435, "step": 965 }, { "epoch": 0.89, "learning_rate": 7.1885011480961164e-06, "loss": 0.9767, "step": 970 }, { "epoch": 0.9, "learning_rate": 6.603716571883689e-06, "loss": 0.952, "step": 975 }, { "epoch": 0.9, "learning_rate": 6.042926285425576e-06, "loss": 0.9489, "step": 980 }, { "epoch": 0.9, "learning_rate": 5.506274360457086e-06, "loss": 0.9431, "step": 985 }, { "epoch": 0.91, "learning_rate": 4.993898667365671e-06, "loss": 0.9069, "step": 990 }, { "epoch": 0.91, "learning_rate": 4.505930839770967e-06, "loss": 0.9383, "step": 995 }, { "epoch": 0.92, "learning_rate": 4.0424962407068166e-06, "loss": 0.9662, "step": 1000 }, { "epoch": 0.92, "learning_rate": 3.6037139304146762e-06, "loss": 0.9183, "step": 1005 }, { "epoch": 0.93, "learning_rate": 3.1896966357558675e-06, "loss": 0.9272, "step": 1010 }, { "epoch": 0.93, "learning_rate": 2.800550721251216e-06, "loss": 0.912, "step": 1015 }, { "epoch": 0.94, "learning_rate": 2.4363761617550053e-06, "loss": 0.9533, "step": 1020 }, { "epoch": 0.94, "learning_rate": 2.0972665167707126e-06, "loss": 0.9398, "step": 1025 }, { "epoch": 0.95, "learning_rate": 1.7833089064146824e-06, "loss": 0.9483, "step": 1030 }, { "epoch": 0.95, "learning_rate": 1.4945839890343261e-06, "loss": 0.9567, "step": 1035 }, { "epoch": 0.96, "learning_rate": 1.231165940486234e-06, "loss": 0.952, "step": 1040 }, { "epoch": 0.96, "learning_rate": 9.931224350798185e-07, "loss": 0.9575, "step": 1045 }, { "epoch": 0.96, "learning_rate": 7.805146281912201e-07, "loss": 0.9609, "step": 1050 }, { "epoch": 0.97, "learning_rate": 5.933971405519656e-07, "loss": 0.9366, "step": 1055 }, { "epoch": 0.97, "learning_rate": 4.3181804421645875e-07, "loss": 0.9812, "step": 1060 }, { "epoch": 0.98, "learning_rate": 2.9581885021181533e-07, "loss": 0.9549, "step": 1065 }, { "epoch": 0.98, "learning_rate": 1.8543449787338242e-07, "loss": 0.953, "step": 1070 }, { "epoch": 0.99, "learning_rate": 1.0069334586854107e-07, "loss": 0.9557, "step": 1075 }, { "epoch": 0.99, "learning_rate": 4.161716491105638e-08, "loss": 0.9659, "step": 1080 }, { "epoch": 1.0, "learning_rate": 8.221132168073631e-09, "loss": 0.9501, "step": 1085 }, { "epoch": 1.0, "eval_loss": 0.9492883682250977, "eval_runtime": 476.3253, "eval_samples_per_second": 16.144, "eval_steps_per_second": 1.01, "step": 1089 }, { "epoch": 1.0, "step": 1089, "total_flos": 6.389065773968523e+18, "train_loss": 0.9597073847049962, "train_runtime": 15464.7646, "train_samples_per_second": 4.504, "train_steps_per_second": 0.07 } ], "logging_steps": 5, "max_steps": 1089, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 6.389065773968523e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }