{ "best_metric": null, "best_model_checkpoint": null, "epoch": 48.63813229571984, "eval_steps": 500, "global_step": 12500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bp": 0.4559528592458481, "eval_counts": [ 3210, 1930, 1488, 1066 ], "eval_loss": 2.412109375, "eval_precisions": [ 71.17516629711751, 48.44377510040161, 43.03065355696935, 36.24617477048623 ], "eval_ref_len": 8052, "eval_runtime": 118.4312, "eval_samples_per_second": 4.441, "eval_score": 21.9569286964753, "eval_steps_per_second": 0.279, "eval_sys_len": 4510, "eval_totals": [ 4510, 3984, 3458, 2941 ], "step": 257 }, { "epoch": 1.95, "learning_rate": 1.9221789883268484e-05, "loss": 2.8948, "step": 500 }, { "epoch": 2.0, "eval_bp": 0.43532380297987505, "eval_counts": [ 1708, 541, 376, 228 ], "eval_loss": 1.5029296875, "eval_precisions": [ 38.853503184713375, 13.979328165374676, 11.24401913875598, 7.497533706017757 ], "eval_ref_len": 8052, "eval_runtime": 115.6212, "eval_samples_per_second": 4.549, "eval_score": 6.3679777301051494, "eval_steps_per_second": 0.285, "eval_sys_len": 4396, "eval_totals": [ 4396, 3870, 3344, 3041 ], "step": 514 }, { "epoch": 3.0, "eval_bp": 0.5596896112039585, "eval_counts": [ 2795, 1858, 1416, 991 ], "eval_loss": 0.65576171875, "eval_precisions": [ 54.85770363101079, 40.66535346903042, 35.02349740291862, 28.177423940858688 ], "eval_ref_len": 8052, "eval_runtime": 115.5065, "eval_samples_per_second": 4.554, "eval_score": 21.558969055160425, "eval_steps_per_second": 0.286, "eval_sys_len": 5095, "eval_totals": [ 5095, 4569, 4043, 3517 ], "step": 771 }, { "epoch": 3.89, "learning_rate": 1.8443579766536967e-05, "loss": 0.8924, "step": 1000 }, { "epoch": 4.0, "eval_bp": 0.6650198090145658, "eval_counts": [ 3257, 2161, 1704, 1256 ], "eval_loss": 0.485107421875, "eval_precisions": [ 56.950515824444835, 41.61371076449066, 36.51167773730448, 30.330837961844964 ], "eval_ref_len": 8052, "eval_runtime": 115.5544, "eval_samples_per_second": 4.552, "eval_score": 26.766843398496384, "eval_steps_per_second": 0.286, "eval_sys_len": 5719, "eval_totals": [ 5719, 5193, 4667, 4141 ], "step": 1028 }, { "epoch": 5.0, "eval_bp": 0.41788118238391686, "eval_counts": [ 3699, 2841, 2368, 1900 ], "eval_loss": 0.293701171875, "eval_precisions": [ 86.02325581395348, 75.27821939586646, 72.9064039408867, 69.80161645848641 ], "eval_ref_len": 8052, "eval_runtime": 115.1699, "eval_samples_per_second": 4.567, "eval_score": 31.661530724736487, "eval_steps_per_second": 0.287, "eval_sys_len": 4300, "eval_totals": [ 4300, 3774, 3248, 2722 ], "step": 1285 }, { "epoch": 5.84, "learning_rate": 1.766536964980545e-05, "loss": 0.4295, "step": 1500 }, { "epoch": 6.0, "eval_bp": 0.4089581075583404, "eval_counts": [ 3783, 2928, 2446, 1971 ], "eval_loss": 0.2445068359375, "eval_precisions": [ 88.9908256880734, 78.60402684563758, 76.46139418568302, 73.73737373737374 ], "eval_ref_len": 8052, "eval_runtime": 115.449, "eval_samples_per_second": 4.556, "eval_score": 32.408490251125635, "eval_steps_per_second": 0.286, "eval_sys_len": 4251, "eval_totals": [ 4251, 3725, 3199, 2673 ], "step": 1542 }, { "epoch": 7.0, "eval_bp": 0.417153226107242, "eval_counts": [ 3818, 2965, 2480, 2002 ], "eval_loss": 0.22021484375, "eval_precisions": [ 88.8733705772812, 78.64721485411141, 76.44882860665845, 73.6571008094187 ], "eval_ref_len": 8052, "eval_runtime": 115.4073, "eval_samples_per_second": 4.558, "eval_score": 33.04119304311829, "eval_steps_per_second": 0.286, "eval_sys_len": 4296, "eval_totals": [ 4296, 3770, 3244, 2718 ], "step": 1799 }, { "epoch": 7.78, "learning_rate": 1.6887159533073932e-05, "loss": 0.2991, "step": 2000 }, { "epoch": 8.0, "eval_bp": 0.42679131632296613, "eval_counts": [ 3874, 3019, 2524, 2038 ], "eval_loss": 0.2076416015625, "eval_precisions": [ 89.07794895378248, 78.9693957624902, 76.55444343342432, 73.54745579213281 ], "eval_ref_len": 8052, "eval_runtime": 114.0107, "eval_samples_per_second": 4.614, "eval_score": 33.85769159645968, "eval_steps_per_second": 0.289, "eval_sys_len": 4349, "eval_totals": [ 4349, 3823, 3297, 2771 ], "step": 2056 }, { "epoch": 9.0, "eval_bp": 0.46028228872696303, "eval_counts": [ 4065, 3225, 2700, 2186 ], "eval_loss": 0.1663818359375, "eval_precisions": [ 89.65593295103662, 80.46407185628742, 77.54164273406089, 73.9512855209743 ], "eval_ref_len": 8052, "eval_runtime": 114.8195, "eval_samples_per_second": 4.581, "eval_score": 36.91388078489759, "eval_steps_per_second": 0.287, "eval_sys_len": 4534, "eval_totals": [ 4534, 4008, 3482, 2956 ], "step": 2313 }, { "epoch": 9.73, "learning_rate": 1.6108949416342414e-05, "loss": 0.2277, "step": 2500 }, { "epoch": 10.0, "eval_bp": 0.5426087135017283, "eval_counts": [ 4419, 3611, 3062, 2525 ], "eval_loss": 0.1044921875, "eval_precisions": [ 88.43305983590155, 80.76492954596287, 77.617237008872, 73.85200350979818 ], "eval_ref_len": 8052, "eval_runtime": 114.7152, "eval_samples_per_second": 4.585, "eval_score": 43.40364936643555, "eval_steps_per_second": 0.288, "eval_sys_len": 4997, "eval_totals": [ 4997, 4471, 3945, 3419 ], "step": 2570 }, { "epoch": 11.0, "eval_bp": 0.5841943959505824, "eval_counts": [ 4717, 3950, 3372, 2808 ], "eval_loss": 0.08892822265625, "eval_precisions": [ 90.07065113614665, 83.846317130121, 80.57347670250896, 76.74227931128723 ], "eval_ref_len": 8052, "eval_runtime": 113.7863, "eval_samples_per_second": 4.623, "eval_score": 48.29263576279789, "eval_steps_per_second": 0.29, "eval_sys_len": 5237, "eval_totals": [ 5237, 4711, 4185, 3659 ], "step": 2827 }, { "epoch": 11.67, "learning_rate": 1.5330739299610897e-05, "loss": 0.1405, "step": 3000 }, { "epoch": 12.0, "eval_bp": 0.5586477230994942, "eval_counts": [ 4630, 3875, 3303, 2749 ], "eval_loss": 0.08489990234375, "eval_precisions": [ 90.98054627628218, 84.9222003068157, 81.81818181818181, 78.29678154371973 ], "eval_ref_len": 8052, "eval_runtime": 107.8868, "eval_samples_per_second": 4.875, "eval_score": 46.85747814062412, "eval_steps_per_second": 0.306, "eval_sys_len": 5089, "eval_totals": [ 5089, 4563, 4037, 3511 ], "step": 3084 }, { "epoch": 13.0, "eval_bp": 0.5695616786732568, "eval_counts": [ 4747, 4034, 3464, 2904 ], "eval_loss": 0.08123779296875, "eval_precisions": [ 92.1389751552795, 87.20276696930394, 84.48780487804878, 81.2534974818131 ], "eval_ref_len": 8052, "eval_runtime": 108.2759, "eval_samples_per_second": 4.858, "eval_score": 49.08436700451685, "eval_steps_per_second": 0.305, "eval_sys_len": 5152, "eval_totals": [ 5152, 4626, 4100, 3574 ], "step": 3341 }, { "epoch": 13.62, "learning_rate": 1.4552529182879378e-05, "loss": 0.1241, "step": 3500 }, { "epoch": 14.0, "eval_bp": 0.5695616786732568, "eval_counts": [ 4738, 4024, 3452, 2894 ], "eval_loss": 0.07525634765625, "eval_precisions": [ 91.96428571428571, 86.98659749243407, 84.1951219512195, 80.97369893676553 ], "eval_ref_len": 8052, "eval_runtime": 107.2784, "eval_samples_per_second": 4.903, "eval_score": 48.94590635934059, "eval_steps_per_second": 0.308, "eval_sys_len": 5152, "eval_totals": [ 5152, 4626, 4100, 3574 ], "step": 3598 }, { "epoch": 15.0, "eval_bp": 0.5961628688829712, "eval_counts": [ 4741, 4006, 3444, 2891 ], "eval_loss": 0.07562255859375, "eval_precisions": [ 89.33484077633314, 83.79000209161264, 80.94007050528789, 77.52748726200053 ], "eval_ref_len": 8052, "eval_runtime": 106.2867, "eval_samples_per_second": 4.949, "eval_score": 49.354074470195435, "eval_steps_per_second": 0.31, "eval_sys_len": 5307, "eval_totals": [ 5307, 4781, 4255, 3729 ], "step": 3855 }, { "epoch": 15.56, "learning_rate": 1.377431906614786e-05, "loss": 0.1147, "step": 4000 }, { "epoch": 16.0, "eval_bp": 0.5655838797151567, "eval_counts": [ 4743, 4039, 3477, 2925 ], "eval_loss": 0.06915283203125, "eval_precisions": [ 92.47416650419186, 87.74712144253748, 85.28329654157469, 82.37116305266122 ], "eval_ref_len": 8052, "eval_runtime": 107.836, "eval_samples_per_second": 4.878, "eval_score": 49.143959340541095, "eval_steps_per_second": 0.306, "eval_sys_len": 5129, "eval_totals": [ 5129, 4603, 4077, 3551 ], "step": 4112 }, { "epoch": 17.0, "eval_bp": 0.5932631592602093, "eval_counts": [ 4727, 3996, 3439, 2892 ], "eval_loss": 0.070068359375, "eval_precisions": [ 89.35727788279773, 83.87909319899245, 81.14676734308637, 77.90948275862068 ], "eval_ref_len": 8052, "eval_runtime": 106.6266, "eval_samples_per_second": 4.933, "eval_score": 49.221934768405774, "eval_steps_per_second": 0.309, "eval_sys_len": 5290, "eval_totals": [ 5290, 4764, 4238, 3712 ], "step": 4369 }, { "epoch": 17.51, "learning_rate": 1.2996108949416343e-05, "loss": 0.1065, "step": 4500 }, { "epoch": 18.0, "eval_bp": 0.5610779943992972, "eval_counts": [ 4753, 4064, 3505, 2956 ], "eval_loss": 0.0623779296875, "eval_precisions": [ 93.14128943758574, 88.7917850120166, 86.52184645766478, 83.8581560283688 ], "eval_ref_len": 8052, "eval_runtime": 106.949, "eval_samples_per_second": 4.918, "eval_score": 49.382124037917905, "eval_steps_per_second": 0.309, "eval_sys_len": 5103, "eval_totals": [ 5103, 4577, 4051, 3525 ], "step": 4626 }, { "epoch": 19.0, "eval_bp": 0.5788702549376445, "eval_counts": [ 4784, 4087, 3529, 2977 ], "eval_loss": 0.060699462890625, "eval_precisions": [ 91.89396849788706, 87.32905982905983, 84.95426095329803, 82.05622932745314 ], "eval_ref_len": 8052, "eval_runtime": 106.4771, "eval_samples_per_second": 4.94, "eval_score": 50.0629990425284, "eval_steps_per_second": 0.31, "eval_sys_len": 5206, "eval_totals": [ 5206, 4680, 4154, 3628 ], "step": 4883 }, { "epoch": 19.46, "learning_rate": 1.2217898832684827e-05, "loss": 0.0964, "step": 5000 }, { "epoch": 20.0, "eval_bp": 0.5826501698750266, "eval_counts": [ 4773, 4068, 3509, 2957 ], "eval_loss": 0.0595703125, "eval_precisions": [ 91.29686304514155, 86.51637601020842, 84.02777777777777, 81.01369863013699 ], "eval_ref_len": 8052, "eval_runtime": 106.5688, "eval_samples_per_second": 4.936, "eval_score": 49.89324555557292, "eval_steps_per_second": 0.31, "eval_sys_len": 5228, "eval_totals": [ 5228, 4702, 4176, 3650 ], "step": 5140 }, { "epoch": 21.0, "eval_bp": 0.5824785136401668, "eval_counts": [ 4780, 4078, 3521, 2972 ], "eval_loss": 0.057952880859375, "eval_precisions": [ 91.4482494738856, 86.74750053180175, 84.33532934131736, 81.44697177308852 ], "eval_ref_len": 8052, "eval_runtime": 105.7144, "eval_samples_per_second": 4.976, "eval_score": 50.04481904653482, "eval_steps_per_second": 0.312, "eval_sys_len": 5227, "eval_totals": [ 5227, 4701, 4175, 3649 ], "step": 5397 }, { "epoch": 21.4, "learning_rate": 1.1439688715953308e-05, "loss": 0.0925, "step": 5500 }, { "epoch": 22.0, "eval_bp": 0.6060221334079605, "eval_counts": [ 4800, 4076, 3514, 2962 ], "eval_loss": 0.060546875, "eval_precisions": [ 89.46877912395153, 84.23227939656954, 81.47461163923023, 78.21494586744124 ], "eval_ref_len": 8052, "eval_runtime": 105.9417, "eval_samples_per_second": 4.965, "eval_score": 50.4491686657978, "eval_steps_per_second": 0.311, "eval_sys_len": 5365, "eval_totals": [ 5365, 4839, 4313, 3787 ], "step": 5654 }, { "epoch": 23.0, "eval_bp": 0.5761166700049626, "eval_counts": [ 4832, 4155, 3593, 3036 ], "eval_loss": 0.053558349609375, "eval_precisions": [ 93.10211946050096, 89.08662092624357, 86.82938617689705, 84.0531561461794 ], "eval_ref_len": 8052, "eval_runtime": 106.564, "eval_samples_per_second": 4.936, "eval_score": 50.81695573260325, "eval_steps_per_second": 0.31, "eval_sys_len": 5190, "eval_totals": [ 5190, 4664, 4138, 3612 ], "step": 5911 }, { "epoch": 23.35, "learning_rate": 1.066147859922179e-05, "loss": 0.0871, "step": 6000 }, { "epoch": 24.0, "eval_bp": 0.5719791556804446, "eval_counts": [ 4807, 4125, 3565, 3012 ], "eval_loss": 0.052276611328125, "eval_precisions": [ 93.05071622144793, 88.90086206896552, 86.65532328633932, 83.94648829431438 ], "eval_ref_len": 8052, "eval_runtime": 107.1172, "eval_samples_per_second": 4.911, "eval_score": 50.37743722047891, "eval_steps_per_second": 0.308, "eval_sys_len": 5166, "eval_totals": [ 5166, 4640, 4114, 3588 ], "step": 6168 }, { "epoch": 25.0, "eval_bp": 0.579214183971878, "eval_counts": [ 4838, 4161, 3602, 3050 ], "eval_loss": 0.050567626953125, "eval_precisions": [ 92.89554531490015, 88.87227680478428, 86.66987487969202, 84.02203856749311 ], "eval_ref_len": 8052, "eval_runtime": 106.5446, "eval_samples_per_second": 4.937, "eval_score": 51.0028956058344, "eval_steps_per_second": 0.31, "eval_sys_len": 5208, "eval_totals": [ 5208, 4682, 4156, 3630 ], "step": 6425 }, { "epoch": 25.29, "learning_rate": 9.883268482490273e-06, "loss": 0.0843, "step": 6500 }, { "epoch": 26.0, "eval_bp": 0.5607309845734951, "eval_counts": [ 4817, 4157, 3596, 3042 ], "eval_loss": 0.051177978515625, "eval_precisions": [ 94.43246422270143, 90.86338797814207, 88.81205235860706, 86.34686346863468 ], "eval_ref_len": 8052, "eval_runtime": 107.5728, "eval_samples_per_second": 4.89, "eval_score": 50.50236718840154, "eval_steps_per_second": 0.307, "eval_sys_len": 5101, "eval_totals": [ 5101, 4575, 4049, 3523 ], "step": 6682 }, { "epoch": 27.0, "eval_bp": 0.5869366550146455, "eval_counts": [ 4855, 4170, 3608, 3055 ], "eval_loss": 0.0489501953125, "eval_precisions": [ 92.42337711783743, 88.21662788237784, 85.88431325874792, 83.12925170068027 ], "eval_ref_len": 8052, "eval_runtime": 109.9833, "eval_samples_per_second": 4.783, "eval_score": 51.26739301541927, "eval_steps_per_second": 0.3, "eval_sys_len": 5253, "eval_totals": [ 5253, 4727, 4201, 3675 ], "step": 6939 }, { "epoch": 27.24, "learning_rate": 9.105058365758756e-06, "loss": 0.0813, "step": 7000 }, { "epoch": 28.0, "eval_bp": 0.5641984935077309, "eval_counts": [ 4838, 4184, 3624, 3070 ], "eval_loss": 0.047760009765625, "eval_precisions": [ 94.47373559851592, 91.05549510337323, 89.06365200294913, 86.64973186565058 ], "eval_ref_len": 8052, "eval_runtime": 109.0318, "eval_samples_per_second": 4.824, "eval_score": 50.9275946797506, "eval_steps_per_second": 0.303, "eval_sys_len": 5121, "eval_totals": [ 5121, 4595, 4069, 3543 ], "step": 7196 }, { "epoch": 29.0, "eval_bp": 0.5711161019095474, "eval_counts": [ 4838, 4179, 3625, 3079 ], "eval_loss": 0.0462646484375, "eval_precisions": [ 93.74152296066654, 90.16181229773463, 88.22097834022877, 85.93357521629919 ], "eval_ref_len": 8052, "eval_runtime": 107.2897, "eval_samples_per_second": 4.903, "eval_score": 51.09715872720289, "eval_steps_per_second": 0.308, "eval_sys_len": 5161, "eval_totals": [ 5161, 4635, 4109, 3583 ], "step": 7453 }, { "epoch": 29.18, "learning_rate": 8.326848249027239e-06, "loss": 0.0778, "step": 7500 }, { "epoch": 30.0, "eval_bp": 0.587279163676868, "eval_counts": [ 4863, 4185, 3626, 3075 ], "eval_loss": 0.04534912109375, "eval_precisions": [ 92.54043767840152, 88.49651089025164, 86.27171068284558, 83.62795757410933 ], "eval_ref_len": 8052, "eval_runtime": 108.186, "eval_samples_per_second": 4.862, "eval_score": 51.488944843891275, "eval_steps_per_second": 0.305, "eval_sys_len": 5255, "eval_totals": [ 5255, 4729, 4203, 3677 ], "step": 7710 }, { "epoch": 31.0, "eval_bp": 0.587279163676868, "eval_counts": [ 4847, 4168, 3612, 3064 ], "eval_loss": 0.044677734375, "eval_precisions": [ 92.23596574690771, 88.137026855572, 85.93861527480371, 83.32880065270601 ], "eval_ref_len": 8052, "eval_runtime": 106.6208, "eval_samples_per_second": 4.933, "eval_score": 51.298555626377826, "eval_steps_per_second": 0.31, "eval_sys_len": 5255, "eval_totals": [ 5255, 4729, 4203, 3677 ], "step": 7967 }, { "epoch": 31.13, "learning_rate": 7.54863813229572e-06, "loss": 0.0753, "step": 8000 }, { "epoch": 32.0, "eval_bp": 0.5690432735111319, "eval_counts": [ 4866, 4219, 3661, 3111 ], "eval_loss": 0.0438232421875, "eval_precisions": [ 94.50378714313459, 91.26108587497296, 89.35806687820356, 87.11845421450575 ], "eval_ref_len": 8052, "eval_runtime": 107.5192, "eval_samples_per_second": 4.892, "eval_score": 51.50981459551784, "eval_steps_per_second": 0.307, "eval_sys_len": 5149, "eval_totals": [ 5149, 4623, 4097, 3571 ], "step": 8224 }, { "epoch": 33.0, "eval_bp": 0.5881351685074624, "eval_counts": [ 4869, 4201, 3645, 3097 ], "eval_loss": 0.04400634765625, "eval_precisions": [ 92.56653992395437, 88.74102239121251, 86.62072243346007, 84.11189570885388 ], "eval_ref_len": 8052, "eval_runtime": 106.8688, "eval_samples_per_second": 4.922, "eval_score": 51.729891771805434, "eval_steps_per_second": 0.309, "eval_sys_len": 5260, "eval_totals": [ 5260, 4734, 4208, 3682 ], "step": 8481 }, { "epoch": 33.07, "learning_rate": 6.770428015564204e-06, "loss": 0.0714, "step": 8500 }, { "epoch": 34.0, "eval_bp": 0.5823068423133116, "eval_counts": [ 4881, 4226, 3674, 3130 ], "eval_loss": 0.041656494140625, "eval_precisions": [ 93.398392652124, 89.91489361702128, 88.02108289410637, 85.80043859649123 ], "eval_ref_len": 8052, "eval_runtime": 107.0835, "eval_samples_per_second": 4.912, "eval_score": 51.96533200156475, "eval_steps_per_second": 0.308, "eval_sys_len": 5226, "eval_totals": [ 5226, 4700, 4174, 3648 ], "step": 8738 }, { "epoch": 35.0, "eval_bp": 0.5862514549555176, "eval_counts": [ 4902, 4242, 3685, 3133 ], "eval_loss": 0.042633056640625, "eval_precisions": [ 93.38921699371309, 89.81579504552191, 87.80081010245414, 85.34459275401798 ], "eval_ref_len": 8052, "eval_runtime": 106.8404, "eval_samples_per_second": 4.923, "eval_score": 52.19933016750815, "eval_steps_per_second": 0.309, "eval_sys_len": 5249, "eval_totals": [ 5249, 4723, 4197, 3671 ], "step": 8995 }, { "epoch": 35.02, "learning_rate": 5.992217898832685e-06, "loss": 0.0697, "step": 9000 }, { "epoch": 36.0, "eval_bp": 0.5807611221368078, "eval_counts": [ 4907, 4257, 3699, 3149 ], "eval_loss": 0.04095458984375, "eval_precisions": [ 94.05788767490895, 90.74824131315285, 88.81152460984394, 86.53476229733444 ], "eval_ref_len": 8052, "eval_runtime": 106.3976, "eval_samples_per_second": 4.944, "eval_score": 52.266194224133834, "eval_steps_per_second": 0.31, "eval_sys_len": 5217, "eval_totals": [ 5217, 4691, 4165, 3639 ], "step": 9252 }, { "epoch": 36.96, "learning_rate": 5.214007782101168e-06, "loss": 0.0686, "step": 9500 }, { "epoch": 37.0, "eval_bp": 0.5983772718445015, "eval_counts": [ 4899, 4227, 3672, 3123 ], "eval_loss": 0.042388916015625, "eval_precisions": [ 92.08646616541354, 88.17271589486859, 86.03561387066541, 83.45804382683058 ], "eval_ref_len": 8052, "eval_runtime": 106.906, "eval_samples_per_second": 4.92, "eval_score": 52.28705860616529, "eval_steps_per_second": 0.309, "eval_sys_len": 5320, "eval_totals": [ 5320, 4794, 4268, 3742 ], "step": 9509 }, { "epoch": 38.0, "eval_bp": 0.5780101703802235, "eval_counts": [ 4913, 4273, 3718, 3172 ], "eval_loss": 0.0394287109375, "eval_precisions": [ 94.46260334551047, 91.40106951871658, 89.61195468787659, 87.55175269113994 ], "eval_ref_len": 8052, "eval_runtime": 106.7621, "eval_samples_per_second": 4.927, "eval_score": 52.43798269679689, "eval_steps_per_second": 0.309, "eval_sys_len": 5201, "eval_totals": [ 5201, 4675, 4149, 3623 ], "step": 9766 }, { "epoch": 38.91, "learning_rate": 4.43579766536965e-06, "loss": 0.0664, "step": 10000 }, { "epoch": 39.0, "eval_bp": 0.5975258891581067, "eval_counts": [ 4912, 4243, 3689, 3141 ], "eval_loss": 0.040374755859375, "eval_precisions": [ 92.41768579492003, 88.59887241595322, 86.5353037766831, 84.05137811078406 ], "eval_ref_len": 8052, "eval_runtime": 106.9692, "eval_samples_per_second": 4.917, "eval_score": 52.491270926490635, "eval_steps_per_second": 0.309, "eval_sys_len": 5315, "eval_totals": [ 5315, 4789, 4263, 3737 ], "step": 10023 }, { "epoch": 40.0, "eval_bp": 0.587279163676868, "eval_counts": [ 4913, 4259, 3711, 3170 ], "eval_loss": 0.0382080078125, "eval_precisions": [ 93.4919124643197, 90.0613237470924, 88.29407566024268, 86.21158553168344 ], "eval_ref_len": 8052, "eval_runtime": 107.0316, "eval_samples_per_second": 4.914, "eval_score": 52.5468859983503, "eval_steps_per_second": 0.308, "eval_sys_len": 5255, "eval_totals": [ 5255, 4729, 4203, 3677 ], "step": 10280 }, { "epoch": 40.86, "learning_rate": 3.6575875486381323e-06, "loss": 0.0658, "step": 10500 }, { "epoch": 41.0, "eval_bp": 0.5811047209098391, "eval_counts": [ 4921, 4278, 3725, 3179 ], "eval_loss": 0.0377197265625, "eval_precisions": [ 94.29009388771796, 91.1570424035798, 89.39284857211423, 87.31117824773413 ], "eval_ref_len": 8052, "eval_runtime": 106.7065, "eval_samples_per_second": 4.929, "eval_score": 52.59102479681527, "eval_steps_per_second": 0.309, "eval_sys_len": 5219, "eval_totals": [ 5219, 4693, 4167, 3641 ], "step": 10537 }, { "epoch": 42.0, "eval_bp": 0.5817917378355022, "eval_counts": [ 4908, 4261, 3712, 3169 ], "eval_loss": 0.037109375, "eval_precisions": [ 93.96898334290637, 90.7174792420694, 88.99544473747302, 86.94101508916324 ], "eval_ref_len": 8052, "eval_runtime": 107.4197, "eval_samples_per_second": 4.897, "eval_score": 52.43056600057888, "eval_steps_per_second": 0.307, "eval_sys_len": 5223, "eval_totals": [ 5223, 4697, 4171, 3645 ], "step": 10794 }, { "epoch": 42.8, "learning_rate": 2.879377431906615e-06, "loss": 0.0643, "step": 11000 }, { "epoch": 43.0, "eval_bp": 0.5804174632159932, "eval_counts": [ 4905, 4264, 3714, 3172 ], "eval_loss": 0.037017822265625, "eval_precisions": [ 94.0556088207095, 90.936233738537, 89.21450876771559, 87.21473742095134 ], "eval_ref_len": 8052, "eval_runtime": 106.7676, "eval_samples_per_second": 4.927, "eval_score": 52.42364666449266, "eval_steps_per_second": 0.309, "eval_sys_len": 5215, "eval_totals": [ 5215, 4689, 4163, 3637 ], "step": 11051 }, { "epoch": 44.0, "eval_bp": 0.5961628688829712, "eval_counts": [ 4930, 4270, 3718, 3173 ], "eval_loss": 0.0380859375, "eval_precisions": [ 92.89617486338798, 89.31185944363104, 87.37955346650999, 85.08983641727005 ], "eval_ref_len": 8052, "eval_runtime": 107.4748, "eval_samples_per_second": 4.894, "eval_score": 52.834006019511406, "eval_steps_per_second": 0.307, "eval_sys_len": 5307, "eval_totals": [ 5307, 4781, 4255, 3729 ], "step": 11308 }, { "epoch": 44.75, "learning_rate": 2.1011673151750974e-06, "loss": 0.0608, "step": 11500 }, { "epoch": 45.0, "eval_bp": 0.5757722034899391, "eval_counts": [ 4915, 4280, 3729, 3186 ], "eval_loss": 0.036224365234375, "eval_precisions": [ 94.7378565921357, 91.8060918060918, 90.15957446808511, 88.25484764542936 ], "eval_ref_len": 8052, "eval_runtime": 107.743, "eval_samples_per_second": 4.882, "eval_score": 52.515446703245765, "eval_steps_per_second": 0.306, "eval_sys_len": 5188, "eval_totals": [ 5188, 4662, 4136, 3610 ], "step": 11565 }, { "epoch": 46.0, "eval_bp": 0.5843659009664612, "eval_counts": [ 4924, 4278, 3730, 3188 ], "eval_loss": 0.036651611328125, "eval_precisions": [ 94.0053455517373, 90.78947368421052, 89.10654562828476, 87.10382513661202 ], "eval_ref_len": 8052, "eval_runtime": 107.889, "eval_samples_per_second": 4.875, "eval_score": 52.71917275684773, "eval_steps_per_second": 0.306, "eval_sys_len": 5238, "eval_totals": [ 5238, 4712, 4186, 3660 ], "step": 11822 }, { "epoch": 46.69, "learning_rate": 1.32295719844358e-06, "loss": 0.0622, "step": 12000 }, { "epoch": 47.0, "eval_bp": 0.586080116901772, "eval_counts": [ 4938, 4295, 3745, 3201 ], "eval_loss": 0.036529541015625, "eval_precisions": [ 94.09298780487805, 90.95722151630665, 89.2516682554814, 87.22070844686648 ], "eval_ref_len": 8052, "eval_runtime": 109.2986, "eval_samples_per_second": 4.813, "eval_score": 52.949832085516945, "eval_steps_per_second": 0.302, "eval_sys_len": 5248, "eval_totals": [ 5248, 4722, 4196, 3670 ], "step": 12079 }, { "epoch": 48.0, "eval_bp": 0.5817917378355022, "eval_counts": [ 4925, 4285, 3733, 3189 ], "eval_loss": 0.036285400390625, "eval_precisions": [ 94.29446678154318, 91.22844368746009, 89.49892112203308, 87.48971193415638 ], "eval_ref_len": 8052, "eval_runtime": 108.6659, "eval_samples_per_second": 4.841, "eval_score": 52.70664408353883, "eval_steps_per_second": 0.304, "eval_sys_len": 5223, "eval_totals": [ 5223, 4697, 4171, 3645 ], "step": 12336 }, { "epoch": 48.64, "learning_rate": 5.447470817120623e-07, "loss": 0.0625, "step": 12500 } ], "logging_steps": 500, "max_steps": 12850, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 8.680648839008256e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }