{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 438, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4285714285714285e-05, "loss": 1.7674, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.857142857142857e-05, "loss": 1.479, "step": 2 }, { "epoch": 0.01, "learning_rate": 4.2857142857142856e-05, "loss": 1.6796, "step": 3 }, { "epoch": 0.01, "learning_rate": 5.714285714285714e-05, "loss": 1.6019, "step": 4 }, { "epoch": 0.01, "learning_rate": 7.142857142857143e-05, "loss": 1.5698, "step": 5 }, { "epoch": 0.01, "learning_rate": 8.571428571428571e-05, "loss": 1.3872, "step": 6 }, { "epoch": 0.02, "learning_rate": 0.0001, "loss": 1.3398, "step": 7 }, { "epoch": 0.02, "learning_rate": 0.00011428571428571428, "loss": 1.4532, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.00012857142857142858, "loss": 1.3419, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.00014285714285714287, "loss": 1.3282, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.00015714285714285716, "loss": 1.4063, "step": 11 }, { "epoch": 0.03, "learning_rate": 0.00017142857142857143, "loss": 1.3195, "step": 12 }, { "epoch": 0.03, "learning_rate": 0.00018571428571428572, "loss": 1.1619, "step": 13 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.4086, "step": 14 }, { "epoch": 0.03, "learning_rate": 0.00019999725503981394, "loss": 1.4226, "step": 15 }, { "epoch": 0.04, "learning_rate": 0.0001999890203099519, "loss": 1.2124, "step": 16 }, { "epoch": 0.04, "learning_rate": 0.00019997529626249397, "loss": 1.1834, "step": 17 }, { "epoch": 0.04, "learning_rate": 0.00019995608365087946, "loss": 1.4668, "step": 18 }, { "epoch": 0.04, "learning_rate": 0.00019993138352986537, "loss": 1.2505, "step": 19 }, { "epoch": 0.05, "learning_rate": 0.0001999011972554688, "loss": 1.3826, "step": 20 }, { "epoch": 0.05, "learning_rate": 0.00019986552648489208, "loss": 1.3494, "step": 21 }, { "epoch": 0.05, "learning_rate": 0.00019982437317643217, "loss": 1.2315, "step": 22 }, { "epoch": 0.05, "learning_rate": 0.00019977773958937287, "loss": 1.3979, "step": 23 }, { "epoch": 0.05, "learning_rate": 0.00019972562828386102, "loss": 1.1124, "step": 24 }, { "epoch": 0.06, "learning_rate": 0.00019966804212076582, "loss": 1.4637, "step": 25 }, { "epoch": 0.06, "learning_rate": 0.0001996049842615217, "loss": 1.2264, "step": 26 }, { "epoch": 0.06, "learning_rate": 0.00019953645816795499, "loss": 1.2921, "step": 27 }, { "epoch": 0.06, "learning_rate": 0.00019946246760209362, "loss": 1.2341, "step": 28 }, { "epoch": 0.07, "learning_rate": 0.00019938301662596075, "loss": 1.2676, "step": 29 }, { "epoch": 0.07, "learning_rate": 0.00019929810960135172, "loss": 1.2557, "step": 30 }, { "epoch": 0.07, "learning_rate": 0.00019920775118959454, "loss": 1.2751, "step": 31 }, { "epoch": 0.07, "learning_rate": 0.0001991119463512941, "loss": 1.1965, "step": 32 }, { "epoch": 0.08, "learning_rate": 0.0001990107003460597, "loss": 1.2291, "step": 33 }, { "epoch": 0.08, "learning_rate": 0.0001989040187322164, "loss": 1.2535, "step": 34 }, { "epoch": 0.08, "learning_rate": 0.00019879190736649992, "loss": 1.3206, "step": 35 }, { "epoch": 0.08, "learning_rate": 0.00019867437240373489, "loss": 1.3678, "step": 36 }, { "epoch": 0.08, "learning_rate": 0.00019855142029649722, "loss": 1.2816, "step": 37 }, { "epoch": 0.09, "learning_rate": 0.00019842305779475968, "loss": 1.379, "step": 38 }, { "epoch": 0.09, "learning_rate": 0.00019828929194552143, "loss": 1.2207, "step": 39 }, { "epoch": 0.09, "learning_rate": 0.00019815013009242104, "loss": 1.2713, "step": 40 }, { "epoch": 0.09, "learning_rate": 0.0001980055798753334, "loss": 1.2709, "step": 41 }, { "epoch": 0.1, "learning_rate": 0.0001978556492299504, "loss": 1.249, "step": 42 }, { "epoch": 0.1, "learning_rate": 0.00019770034638734506, "loss": 1.5568, "step": 43 }, { "epoch": 0.1, "learning_rate": 0.00019753967987351978, "loss": 1.256, "step": 44 }, { "epoch": 0.1, "learning_rate": 0.0001973736585089382, "loss": 1.4316, "step": 45 }, { "epoch": 0.11, "learning_rate": 0.0001972022914080411, "loss": 1.3448, "step": 46 }, { "epoch": 0.11, "learning_rate": 0.0001970255879787458, "loss": 1.137, "step": 47 }, { "epoch": 0.11, "learning_rate": 0.0001968435579219299, "loss": 1.1931, "step": 48 }, { "epoch": 0.11, "learning_rate": 0.0001966562112308985, "loss": 1.2504, "step": 49 }, { "epoch": 0.11, "learning_rate": 0.00019646355819083589, "loss": 1.3846, "step": 50 }, { "epoch": 0.12, "learning_rate": 0.00019626560937824046, "loss": 1.1977, "step": 51 }, { "epoch": 0.12, "learning_rate": 0.00019606237566034443, "loss": 1.2859, "step": 52 }, { "epoch": 0.12, "learning_rate": 0.00019585386819451708, "loss": 1.3802, "step": 53 }, { "epoch": 0.12, "learning_rate": 0.00019564009842765225, "loss": 1.342, "step": 54 }, { "epoch": 0.13, "learning_rate": 0.00019542107809553992, "loss": 1.2094, "step": 55 }, { "epoch": 0.13, "learning_rate": 0.00019519681922222195, "loss": 1.2858, "step": 56 }, { "epoch": 0.13, "learning_rate": 0.00019496733411933188, "loss": 1.2035, "step": 57 }, { "epoch": 0.13, "learning_rate": 0.00019473263538541914, "loss": 1.1805, "step": 58 }, { "epoch": 0.13, "learning_rate": 0.00019449273590525735, "loss": 1.2136, "step": 59 }, { "epoch": 0.14, "learning_rate": 0.0001942476488491369, "loss": 1.3127, "step": 60 }, { "epoch": 0.14, "learning_rate": 0.000193997387672142, "loss": 1.3307, "step": 61 }, { "epoch": 0.14, "learning_rate": 0.0001937419661134121, "loss": 1.3789, "step": 62 }, { "epoch": 0.14, "learning_rate": 0.0001934813981953873, "loss": 1.282, "step": 63 }, { "epoch": 0.15, "learning_rate": 0.0001932156982230388, "loss": 1.3562, "step": 64 }, { "epoch": 0.15, "learning_rate": 0.00019294488078308355, "loss": 1.2395, "step": 65 }, { "epoch": 0.15, "learning_rate": 0.00019266896074318334, "loss": 1.2479, "step": 66 }, { "epoch": 0.15, "learning_rate": 0.0001923879532511287, "loss": 1.2428, "step": 67 }, { "epoch": 0.16, "learning_rate": 0.0001921018737340071, "loss": 1.2866, "step": 68 }, { "epoch": 0.16, "learning_rate": 0.00019181073789735628, "loss": 1.2492, "step": 69 }, { "epoch": 0.16, "learning_rate": 0.00019151456172430183, "loss": 1.2166, "step": 70 }, { "epoch": 0.16, "learning_rate": 0.0001912133614746799, "loss": 1.2707, "step": 71 }, { "epoch": 0.16, "learning_rate": 0.0001909071536841442, "loss": 1.1933, "step": 72 }, { "epoch": 0.17, "learning_rate": 0.0001905959551632587, "loss": 1.1556, "step": 73 }, { "epoch": 0.17, "learning_rate": 0.00019027978299657436, "loss": 1.3107, "step": 74 }, { "epoch": 0.17, "learning_rate": 0.0001899586545416914, "loss": 1.1491, "step": 75 }, { "epoch": 0.17, "learning_rate": 0.00018963258742830626, "loss": 1.2597, "step": 76 }, { "epoch": 0.18, "learning_rate": 0.00018930159955724387, "loss": 1.1665, "step": 77 }, { "epoch": 0.18, "learning_rate": 0.00018896570909947475, "loss": 1.2775, "step": 78 }, { "epoch": 0.18, "learning_rate": 0.00018862493449511755, "loss": 1.3604, "step": 79 }, { "epoch": 0.18, "learning_rate": 0.00018827929445242676, "loss": 1.2741, "step": 80 }, { "epoch": 0.18, "learning_rate": 0.00018792880794676545, "loss": 1.2189, "step": 81 }, { "epoch": 0.19, "learning_rate": 0.0001875734942195637, "loss": 1.199, "step": 82 }, { "epoch": 0.19, "learning_rate": 0.0001872133727772622, "loss": 1.2576, "step": 83 }, { "epoch": 0.19, "learning_rate": 0.00018684846339024142, "loss": 1.2084, "step": 84 }, { "epoch": 0.19, "learning_rate": 0.0001864787860917361, "loss": 1.2359, "step": 85 }, { "epoch": 0.2, "learning_rate": 0.00018610436117673555, "loss": 1.2863, "step": 86 }, { "epoch": 0.2, "learning_rate": 0.0001857252092008695, "loss": 1.2989, "step": 87 }, { "epoch": 0.2, "learning_rate": 0.0001853413509792795, "loss": 1.4557, "step": 88 }, { "epoch": 0.2, "learning_rate": 0.00018495280758547622, "loss": 1.2275, "step": 89 }, { "epoch": 0.21, "learning_rate": 0.0001845596003501826, "loss": 1.261, "step": 90 }, { "epoch": 0.21, "learning_rate": 0.00018416175086016282, "loss": 1.2169, "step": 91 }, { "epoch": 0.21, "learning_rate": 0.00018375928095703704, "loss": 1.3301, "step": 92 }, { "epoch": 0.21, "learning_rate": 0.0001833522127360824, "loss": 1.2981, "step": 93 }, { "epoch": 0.21, "learning_rate": 0.0001829405685450202, "loss": 1.1342, "step": 94 }, { "epoch": 0.22, "learning_rate": 0.00018252437098278867, "loss": 1.1992, "step": 95 }, { "epoch": 0.22, "learning_rate": 0.0001821036428983026, "loss": 1.201, "step": 96 }, { "epoch": 0.22, "learning_rate": 0.00018167840738919882, "loss": 1.1617, "step": 97 }, { "epoch": 0.22, "learning_rate": 0.00018124868780056814, "loss": 1.2717, "step": 98 }, { "epoch": 0.23, "learning_rate": 0.00018081450772367382, "loss": 1.158, "step": 99 }, { "epoch": 0.23, "learning_rate": 0.00018037589099465637, "loss": 1.2361, "step": 100 }, { "epoch": 0.23, "learning_rate": 0.0001799328616932249, "loss": 1.1591, "step": 101 }, { "epoch": 0.23, "learning_rate": 0.00017948544414133534, "loss": 1.2643, "step": 102 }, { "epoch": 0.24, "learning_rate": 0.00017903366290185498, "loss": 1.3476, "step": 103 }, { "epoch": 0.24, "learning_rate": 0.00017857754277721415, "loss": 1.1718, "step": 104 }, { "epoch": 0.24, "learning_rate": 0.00017811710880804449, "loss": 1.3174, "step": 105 }, { "epoch": 0.24, "learning_rate": 0.00017765238627180424, "loss": 1.2998, "step": 106 }, { "epoch": 0.24, "learning_rate": 0.00017718340068139066, "loss": 1.1868, "step": 107 }, { "epoch": 0.25, "learning_rate": 0.00017671017778373913, "loss": 1.3348, "step": 108 }, { "epoch": 0.25, "learning_rate": 0.00017623274355841, "loss": 1.5578, "step": 109 }, { "epoch": 0.25, "learning_rate": 0.00017575112421616202, "loss": 1.4431, "step": 110 }, { "epoch": 0.25, "learning_rate": 0.0001752653461975136, "loss": 1.1016, "step": 111 }, { "epoch": 0.26, "learning_rate": 0.0001747754361712911, "loss": 1.2214, "step": 112 }, { "epoch": 0.26, "learning_rate": 0.0001742814210331649, "loss": 1.3365, "step": 113 }, { "epoch": 0.26, "learning_rate": 0.00017378332790417273, "loss": 1.2054, "step": 114 }, { "epoch": 0.26, "learning_rate": 0.0001732811841292307, "loss": 1.3726, "step": 115 }, { "epoch": 0.26, "learning_rate": 0.00017277501727563224, "loss": 1.1261, "step": 116 }, { "epoch": 0.27, "learning_rate": 0.00017226485513153456, "loss": 1.4755, "step": 117 }, { "epoch": 0.27, "learning_rate": 0.00017175072570443312, "loss": 1.1085, "step": 118 }, { "epoch": 0.27, "learning_rate": 0.0001712326572196241, "loss": 1.1907, "step": 119 }, { "epoch": 0.27, "learning_rate": 0.00017071067811865476, "loss": 1.177, "step": 120 }, { "epoch": 0.28, "learning_rate": 0.00017018481705776211, "loss": 1.3567, "step": 121 }, { "epoch": 0.28, "learning_rate": 0.00016965510290629972, "loss": 1.2203, "step": 122 }, { "epoch": 0.28, "learning_rate": 0.00016912156474515263, "loss": 1.2713, "step": 123 }, { "epoch": 0.28, "learning_rate": 0.00016858423186514107, "loss": 1.259, "step": 124 }, { "epoch": 0.29, "learning_rate": 0.00016804313376541226, "loss": 1.2689, "step": 125 }, { "epoch": 0.29, "learning_rate": 0.00016749830015182107, "loss": 1.2313, "step": 126 }, { "epoch": 0.29, "learning_rate": 0.00016694976093529896, "loss": 1.3463, "step": 127 }, { "epoch": 0.29, "learning_rate": 0.00016639754623021225, "loss": 1.1488, "step": 128 }, { "epoch": 0.29, "learning_rate": 0.0001658416863527084, "loss": 1.2683, "step": 129 }, { "epoch": 0.3, "learning_rate": 0.00016528221181905217, "loss": 1.3241, "step": 130 }, { "epoch": 0.3, "learning_rate": 0.0001647191533439499, "loss": 1.2318, "step": 131 }, { "epoch": 0.3, "learning_rate": 0.00016415254183886355, "loss": 1.262, "step": 132 }, { "epoch": 0.3, "learning_rate": 0.00016358240841031352, "loss": 1.0975, "step": 133 }, { "epoch": 0.31, "learning_rate": 0.00016300878435817113, "loss": 1.4011, "step": 134 }, { "epoch": 0.31, "learning_rate": 0.00016243170117394, "loss": 1.1241, "step": 135 }, { "epoch": 0.31, "learning_rate": 0.00016185119053902747, "loss": 1.3117, "step": 136 }, { "epoch": 0.31, "learning_rate": 0.00016126728432300515, "loss": 1.1575, "step": 137 }, { "epoch": 0.32, "learning_rate": 0.00016068001458185936, "loss": 1.2994, "step": 138 }, { "epoch": 0.32, "learning_rate": 0.0001600894135562312, "loss": 1.2535, "step": 139 }, { "epoch": 0.32, "learning_rate": 0.00015949551366964675, "loss": 1.1151, "step": 140 }, { "epoch": 0.32, "learning_rate": 0.00015889834752673684, "loss": 1.2376, "step": 141 }, { "epoch": 0.32, "learning_rate": 0.0001582979479114472, "loss": 1.2403, "step": 142 }, { "epoch": 0.33, "learning_rate": 0.00015769434778523868, "loss": 1.2529, "step": 143 }, { "epoch": 0.33, "learning_rate": 0.00015708758028527752, "loss": 1.1528, "step": 144 }, { "epoch": 0.33, "learning_rate": 0.00015647767872261633, "loss": 1.2316, "step": 145 }, { "epoch": 0.33, "learning_rate": 0.00015586467658036524, "loss": 1.0899, "step": 146 }, { "epoch": 0.34, "learning_rate": 0.00015524860751185376, "loss": 0.9716, "step": 147 }, { "epoch": 0.34, "learning_rate": 0.00015462950533878317, "loss": 1.1498, "step": 148 }, { "epoch": 0.34, "learning_rate": 0.00015400740404936979, "loss": 1.1439, "step": 149 }, { "epoch": 0.34, "learning_rate": 0.0001533823377964791, "loss": 1.2853, "step": 150 }, { "epoch": 0.34, "learning_rate": 0.00015275434089575055, "loss": 1.3359, "step": 151 }, { "epoch": 0.35, "learning_rate": 0.000152123447823714, "loss": 1.1899, "step": 152 }, { "epoch": 0.35, "learning_rate": 0.00015148969321589673, "loss": 1.2058, "step": 153 }, { "epoch": 0.35, "learning_rate": 0.00015085311186492206, "loss": 1.0606, "step": 154 }, { "epoch": 0.35, "learning_rate": 0.00015021373871859924, "loss": 1.1693, "step": 155 }, { "epoch": 0.36, "learning_rate": 0.00014957160887800494, "loss": 1.4608, "step": 156 }, { "epoch": 0.36, "learning_rate": 0.00014892675759555605, "loss": 1.1654, "step": 157 }, { "epoch": 0.36, "learning_rate": 0.00014827922027307451, "loss": 1.2638, "step": 158 }, { "epoch": 0.36, "learning_rate": 0.00014762903245984368, "loss": 1.377, "step": 159 }, { "epoch": 0.37, "learning_rate": 0.0001469762298506568, "loss": 1.2937, "step": 160 }, { "epoch": 0.37, "learning_rate": 0.0001463208482838573, "loss": 1.1174, "step": 161 }, { "epoch": 0.37, "learning_rate": 0.0001456629237393713, "loss": 1.1493, "step": 162 }, { "epoch": 0.37, "learning_rate": 0.00014500249233673248, "loss": 1.2626, "step": 163 }, { "epoch": 0.37, "learning_rate": 0.00014433959033309887, "loss": 1.2071, "step": 164 }, { "epoch": 0.38, "learning_rate": 0.00014367425412126265, "loss": 1.3357, "step": 165 }, { "epoch": 0.38, "learning_rate": 0.00014300652022765207, "loss": 1.1992, "step": 166 }, { "epoch": 0.38, "learning_rate": 0.00014233642531032614, "loss": 1.1616, "step": 167 }, { "epoch": 0.38, "learning_rate": 0.00014166400615696231, "loss": 1.3137, "step": 168 }, { "epoch": 0.39, "learning_rate": 0.0001409892996828366, "loss": 1.2868, "step": 169 }, { "epoch": 0.39, "learning_rate": 0.00014031234292879725, "loss": 1.2584, "step": 170 }, { "epoch": 0.39, "learning_rate": 0.00013963317305923094, "loss": 1.3188, "step": 171 }, { "epoch": 0.39, "learning_rate": 0.00013895182736002276, "loss": 1.0481, "step": 172 }, { "epoch": 0.39, "learning_rate": 0.000138268343236509, "loss": 1.312, "step": 173 }, { "epoch": 0.4, "learning_rate": 0.00013758275821142382, "loss": 1.0606, "step": 174 }, { "epoch": 0.4, "learning_rate": 0.00013689510992283922, "loss": 1.276, "step": 175 }, { "epoch": 0.4, "learning_rate": 0.00013620543612209861, "loss": 1.39, "step": 176 }, { "epoch": 0.4, "learning_rate": 0.00013551377467174456, "loss": 1.1824, "step": 177 }, { "epoch": 0.41, "learning_rate": 0.0001348201635434399, "loss": 1.1213, "step": 178 }, { "epoch": 0.41, "learning_rate": 0.00013412464081588322, "loss": 1.2384, "step": 179 }, { "epoch": 0.41, "learning_rate": 0.0001334272446727185, "loss": 1.0606, "step": 180 }, { "epoch": 0.41, "learning_rate": 0.00013272801340043867, "loss": 1.147, "step": 181 }, { "epoch": 0.42, "learning_rate": 0.00013202698538628376, "loss": 1.1966, "step": 182 }, { "epoch": 0.42, "learning_rate": 0.0001313241991161336, "loss": 1.2423, "step": 183 }, { "epoch": 0.42, "learning_rate": 0.00013061969317239468, "loss": 1.1797, "step": 184 }, { "epoch": 0.42, "learning_rate": 0.00012991350623188245, "loss": 1.4082, "step": 185 }, { "epoch": 0.42, "learning_rate": 0.00012920567706369758, "loss": 1.0627, "step": 186 }, { "epoch": 0.43, "learning_rate": 0.0001284962445270978, "loss": 1.233, "step": 187 }, { "epoch": 0.43, "learning_rate": 0.0001277852475693644, "loss": 1.1788, "step": 188 }, { "epoch": 0.43, "learning_rate": 0.00012707272522366426, "loss": 1.0949, "step": 189 }, { "epoch": 0.43, "learning_rate": 0.00012635871660690676, "loss": 1.1947, "step": 190 }, { "epoch": 0.44, "learning_rate": 0.00012564326091759646, "loss": 1.1302, "step": 191 }, { "epoch": 0.44, "learning_rate": 0.00012492639743368097, "loss": 1.2702, "step": 192 }, { "epoch": 0.44, "learning_rate": 0.0001242081655103947, "loss": 1.1206, "step": 193 }, { "epoch": 0.44, "learning_rate": 0.00012348860457809838, "loss": 1.2226, "step": 194 }, { "epoch": 0.45, "learning_rate": 0.0001227677541401142, "loss": 1.1773, "step": 195 }, { "epoch": 0.45, "learning_rate": 0.00012204565377055718, "loss": 1.1343, "step": 196 }, { "epoch": 0.45, "learning_rate": 0.0001213223431121627, "loss": 1.1041, "step": 197 }, { "epoch": 0.45, "learning_rate": 0.00012059786187410984, "loss": 1.0455, "step": 198 }, { "epoch": 0.45, "learning_rate": 0.00011987224982984176, "loss": 1.2509, "step": 199 }, { "epoch": 0.46, "learning_rate": 0.00011914554681488188, "loss": 1.143, "step": 200 }, { "epoch": 0.46, "learning_rate": 0.00011841779272464703, "loss": 1.3258, "step": 201 }, { "epoch": 0.46, "learning_rate": 0.0001176890275122573, "loss": 1.2916, "step": 202 }, { "epoch": 0.46, "learning_rate": 0.00011695929118634256, "loss": 1.1948, "step": 203 }, { "epoch": 0.47, "learning_rate": 0.00011622862380884601, "loss": 1.2079, "step": 204 }, { "epoch": 0.47, "learning_rate": 0.00011549706549282486, "loss": 1.0284, "step": 205 }, { "epoch": 0.47, "learning_rate": 0.00011476465640024814, "loss": 1.2139, "step": 206 }, { "epoch": 0.47, "learning_rate": 0.00011403143673979183, "loss": 1.1924, "step": 207 }, { "epoch": 0.47, "learning_rate": 0.00011329744676463143, "loss": 1.1459, "step": 208 }, { "epoch": 0.48, "learning_rate": 0.00011256272677023214, "loss": 0.992, "step": 209 }, { "epoch": 0.48, "learning_rate": 0.00011182731709213659, "loss": 1.1304, "step": 210 }, { "epoch": 0.48, "learning_rate": 0.00011109125810375054, "loss": 1.2196, "step": 211 }, { "epoch": 0.48, "learning_rate": 0.0001103545902141263, "loss": 1.1904, "step": 212 }, { "epoch": 0.49, "learning_rate": 0.00010961735386574448, "loss": 1.2486, "step": 213 }, { "epoch": 0.49, "learning_rate": 0.00010887958953229349, "loss": 1.2505, "step": 214 }, { "epoch": 0.49, "learning_rate": 0.00010814133771644783, "loss": 1.1688, "step": 215 }, { "epoch": 0.49, "learning_rate": 0.00010740263894764429, "loss": 1.1316, "step": 216 }, { "epoch": 0.5, "learning_rate": 0.00010666353377985711, "loss": 1.0465, "step": 217 }, { "epoch": 0.5, "learning_rate": 0.00010592406278937144, "loss": 1.1121, "step": 218 }, { "epoch": 0.5, "learning_rate": 0.00010518426657255585, "loss": 1.0605, "step": 219 }, { "epoch": 0.5, "learning_rate": 0.00010444418574363353, "loss": 1.0242, "step": 220 }, { "epoch": 0.5, "learning_rate": 0.0001037038609324527, "loss": 1.1648, "step": 221 }, { "epoch": 0.51, "learning_rate": 0.00010296333278225599, "loss": 1.2036, "step": 222 }, { "epoch": 0.51, "learning_rate": 0.00010222264194744918, "loss": 1.1253, "step": 223 }, { "epoch": 0.51, "learning_rate": 0.00010148182909136928, "loss": 1.0713, "step": 224 }, { "epoch": 0.51, "learning_rate": 0.00010074093488405223, "loss": 1.0154, "step": 225 }, { "epoch": 0.52, "learning_rate": 0.0001, "loss": 1.2678, "step": 226 }, { "epoch": 0.52, "learning_rate": 9.92590651159478e-05, "loss": 1.2334, "step": 227 }, { "epoch": 0.52, "learning_rate": 9.851817090863073e-05, "loss": 1.2362, "step": 228 }, { "epoch": 0.52, "learning_rate": 9.777735805255087e-05, "loss": 1.261, "step": 229 }, { "epoch": 0.53, "learning_rate": 9.703666721774402e-05, "loss": 1.0949, "step": 230 }, { "epoch": 0.53, "learning_rate": 9.629613906754731e-05, "loss": 1.4044, "step": 231 }, { "epoch": 0.53, "learning_rate": 9.555581425636648e-05, "loss": 1.1616, "step": 232 }, { "epoch": 0.53, "learning_rate": 9.481573342744419e-05, "loss": 1.0162, "step": 233 }, { "epoch": 0.53, "learning_rate": 9.407593721062859e-05, "loss": 1.2492, "step": 234 }, { "epoch": 0.54, "learning_rate": 9.33364662201429e-05, "loss": 1.1703, "step": 235 }, { "epoch": 0.54, "learning_rate": 9.259736105235572e-05, "loss": 1.1257, "step": 236 }, { "epoch": 0.54, "learning_rate": 9.18586622835522e-05, "loss": 1.1351, "step": 237 }, { "epoch": 0.54, "learning_rate": 9.112041046770653e-05, "loss": 1.3618, "step": 238 }, { "epoch": 0.55, "learning_rate": 9.038264613425556e-05, "loss": 1.0293, "step": 239 }, { "epoch": 0.55, "learning_rate": 8.96454097858737e-05, "loss": 1.2353, "step": 240 }, { "epoch": 0.55, "learning_rate": 8.890874189624951e-05, "loss": 1.0402, "step": 241 }, { "epoch": 0.55, "learning_rate": 8.817268290786343e-05, "loss": 1.1703, "step": 242 }, { "epoch": 0.55, "learning_rate": 8.743727322976787e-05, "loss": 1.154, "step": 243 }, { "epoch": 0.56, "learning_rate": 8.670255323536858e-05, "loss": 1.1215, "step": 244 }, { "epoch": 0.56, "learning_rate": 8.596856326020821e-05, "loss": 1.0841, "step": 245 }, { "epoch": 0.56, "learning_rate": 8.523534359975189e-05, "loss": 1.0127, "step": 246 }, { "epoch": 0.56, "learning_rate": 8.450293450717517e-05, "loss": 1.149, "step": 247 }, { "epoch": 0.57, "learning_rate": 8.3771376191154e-05, "loss": 1.1336, "step": 248 }, { "epoch": 0.57, "learning_rate": 8.304070881365747e-05, "loss": 1.1274, "step": 249 }, { "epoch": 0.57, "learning_rate": 8.231097248774274e-05, "loss": 1.0389, "step": 250 }, { "epoch": 0.57, "learning_rate": 8.158220727535299e-05, "loss": 1.1231, "step": 251 }, { "epoch": 0.58, "learning_rate": 8.085445318511813e-05, "loss": 0.9329, "step": 252 }, { "epoch": 0.58, "learning_rate": 8.012775017015828e-05, "loss": 1.1147, "step": 253 }, { "epoch": 0.58, "learning_rate": 7.940213812589018e-05, "loss": 1.0832, "step": 254 }, { "epoch": 0.58, "learning_rate": 7.867765688783733e-05, "loss": 1.1802, "step": 255 }, { "epoch": 0.58, "learning_rate": 7.795434622944281e-05, "loss": 1.1738, "step": 256 }, { "epoch": 0.59, "learning_rate": 7.723224585988584e-05, "loss": 1.0246, "step": 257 }, { "epoch": 0.59, "learning_rate": 7.651139542190164e-05, "loss": 1.2217, "step": 258 }, { "epoch": 0.59, "learning_rate": 7.579183448960531e-05, "loss": 1.0989, "step": 259 }, { "epoch": 0.59, "learning_rate": 7.507360256631904e-05, "loss": 1.1477, "step": 260 }, { "epoch": 0.6, "learning_rate": 7.435673908240357e-05, "loss": 1.1377, "step": 261 }, { "epoch": 0.6, "learning_rate": 7.364128339309326e-05, "loss": 1.0929, "step": 262 }, { "epoch": 0.6, "learning_rate": 7.292727477633575e-05, "loss": 1.0974, "step": 263 }, { "epoch": 0.6, "learning_rate": 7.221475243063563e-05, "loss": 1.2126, "step": 264 }, { "epoch": 0.61, "learning_rate": 7.150375547290225e-05, "loss": 1.0974, "step": 265 }, { "epoch": 0.61, "learning_rate": 7.079432293630244e-05, "loss": 1.1057, "step": 266 }, { "epoch": 0.61, "learning_rate": 7.008649376811756e-05, "loss": 1.1579, "step": 267 }, { "epoch": 0.61, "learning_rate": 6.938030682760532e-05, "loss": 1.1736, "step": 268 }, { "epoch": 0.61, "learning_rate": 6.867580088386646e-05, "loss": 1.0662, "step": 269 }, { "epoch": 0.62, "learning_rate": 6.797301461371625e-05, "loss": 1.2125, "step": 270 }, { "epoch": 0.62, "learning_rate": 6.727198659956133e-05, "loss": 1.1253, "step": 271 }, { "epoch": 0.62, "learning_rate": 6.65727553272815e-05, "loss": 1.0973, "step": 272 }, { "epoch": 0.62, "learning_rate": 6.58753591841168e-05, "loss": 1.2053, "step": 273 }, { "epoch": 0.63, "learning_rate": 6.517983645656014e-05, "loss": 1.1743, "step": 274 }, { "epoch": 0.63, "learning_rate": 6.448622532825545e-05, "loss": 1.2297, "step": 275 }, { "epoch": 0.63, "learning_rate": 6.379456387790138e-05, "loss": 1.1378, "step": 276 }, { "epoch": 0.63, "learning_rate": 6.310489007716083e-05, "loss": 1.2174, "step": 277 }, { "epoch": 0.63, "learning_rate": 6.24172417885762e-05, "loss": 1.2263, "step": 278 }, { "epoch": 0.64, "learning_rate": 6.173165676349103e-05, "loss": 1.148, "step": 279 }, { "epoch": 0.64, "learning_rate": 6.104817263997727e-05, "loss": 1.0725, "step": 280 }, { "epoch": 0.64, "learning_rate": 6.036682694076907e-05, "loss": 1.1362, "step": 281 }, { "epoch": 0.64, "learning_rate": 5.96876570712028e-05, "loss": 1.0239, "step": 282 }, { "epoch": 0.65, "learning_rate": 5.9010700317163404e-05, "loss": 1.102, "step": 283 }, { "epoch": 0.65, "learning_rate": 5.8335993843037695e-05, "loss": 1.0572, "step": 284 }, { "epoch": 0.65, "learning_rate": 5.7663574689673847e-05, "loss": 1.2498, "step": 285 }, { "epoch": 0.65, "learning_rate": 5.699347977234799e-05, "loss": 1.1782, "step": 286 }, { "epoch": 0.66, "learning_rate": 5.6325745878737355e-05, "loss": 1.0844, "step": 287 }, { "epoch": 0.66, "learning_rate": 5.566040966690115e-05, "loss": 1.2224, "step": 288 }, { "epoch": 0.66, "learning_rate": 5.4997507663267546e-05, "loss": 1.1441, "step": 289 }, { "epoch": 0.66, "learning_rate": 5.43370762606287e-05, "loss": 1.2026, "step": 290 }, { "epoch": 0.66, "learning_rate": 5.367915171614273e-05, "loss": 1.2394, "step": 291 }, { "epoch": 0.67, "learning_rate": 5.302377014934322e-05, "loss": 1.0419, "step": 292 }, { "epoch": 0.67, "learning_rate": 5.2370967540156346e-05, "loss": 0.9862, "step": 293 }, { "epoch": 0.67, "learning_rate": 5.172077972692553e-05, "loss": 1.0682, "step": 294 }, { "epoch": 0.67, "learning_rate": 5.1073242404443974e-05, "loss": 1.1262, "step": 295 }, { "epoch": 0.68, "learning_rate": 5.042839112199509e-05, "loss": 1.1062, "step": 296 }, { "epoch": 0.68, "learning_rate": 4.978626128140074e-05, "loss": 0.9907, "step": 297 }, { "epoch": 0.68, "learning_rate": 4.914688813507797e-05, "loss": 1.1321, "step": 298 }, { "epoch": 0.68, "learning_rate": 4.851030678410331e-05, "loss": 1.2056, "step": 299 }, { "epoch": 0.68, "learning_rate": 4.7876552176286005e-05, "loss": 1.1657, "step": 300 }, { "epoch": 0.69, "learning_rate": 4.724565910424946e-05, "loss": 1.0266, "step": 301 }, { "epoch": 0.69, "learning_rate": 4.661766220352097e-05, "loss": 0.9024, "step": 302 }, { "epoch": 0.69, "learning_rate": 4.59925959506302e-05, "loss": 1.5604, "step": 303 }, { "epoch": 0.69, "learning_rate": 4.5370494661216835e-05, "loss": 1.0028, "step": 304 }, { "epoch": 0.7, "learning_rate": 4.475139248814625e-05, "loss": 1.0763, "step": 305 }, { "epoch": 0.7, "learning_rate": 4.4135323419634766e-05, "loss": 1.0086, "step": 306 }, { "epoch": 0.7, "learning_rate": 4.352232127738368e-05, "loss": 1.0214, "step": 307 }, { "epoch": 0.7, "learning_rate": 4.2912419714722496e-05, "loss": 0.9547, "step": 308 }, { "epoch": 0.71, "learning_rate": 4.230565221476131e-05, "loss": 1.0584, "step": 309 }, { "epoch": 0.71, "learning_rate": 4.170205208855281e-05, "loss": 1.2047, "step": 310 }, { "epoch": 0.71, "learning_rate": 4.1101652473263195e-05, "loss": 1.0804, "step": 311 }, { "epoch": 0.71, "learning_rate": 4.0504486330353264e-05, "loss": 1.0394, "step": 312 }, { "epoch": 0.71, "learning_rate": 3.9910586443768806e-05, "loss": 1.1128, "step": 313 }, { "epoch": 0.72, "learning_rate": 3.931998541814069e-05, "loss": 1.2576, "step": 314 }, { "epoch": 0.72, "learning_rate": 3.873271567699485e-05, "loss": 1.0032, "step": 315 }, { "epoch": 0.72, "learning_rate": 3.814880946097252e-05, "loss": 1.0491, "step": 316 }, { "epoch": 0.72, "learning_rate": 3.756829882606001e-05, "loss": 1.0629, "step": 317 }, { "epoch": 0.73, "learning_rate": 3.69912156418289e-05, "loss": 1.0391, "step": 318 }, { "epoch": 0.73, "learning_rate": 3.641759158968649e-05, "loss": 1.0735, "step": 319 }, { "epoch": 0.73, "learning_rate": 3.584745816113648e-05, "loss": 1.0636, "step": 320 }, { "epoch": 0.73, "learning_rate": 3.528084665605013e-05, "loss": 1.0232, "step": 321 }, { "epoch": 0.74, "learning_rate": 3.471778818094785e-05, "loss": 1.2711, "step": 322 }, { "epoch": 0.74, "learning_rate": 3.4158313647291604e-05, "loss": 0.9995, "step": 323 }, { "epoch": 0.74, "learning_rate": 3.360245376978779e-05, "loss": 1.1935, "step": 324 }, { "epoch": 0.74, "learning_rate": 3.3050239064701016e-05, "loss": 1.2296, "step": 325 }, { "epoch": 0.74, "learning_rate": 3.250169984817897e-05, "loss": 1.1118, "step": 326 }, { "epoch": 0.75, "learning_rate": 3.1956866234587766e-05, "loss": 1.0869, "step": 327 }, { "epoch": 0.75, "learning_rate": 3.1415768134858945e-05, "loss": 1.1736, "step": 328 }, { "epoch": 0.75, "learning_rate": 3.0878435254847394e-05, "loss": 0.9783, "step": 329 }, { "epoch": 0.75, "learning_rate": 3.034489709370033e-05, "loss": 1.0698, "step": 330 }, { "epoch": 0.76, "learning_rate": 2.9815182942237885e-05, "loss": 1.0023, "step": 331 }, { "epoch": 0.76, "learning_rate": 2.9289321881345254e-05, "loss": 0.9884, "step": 332 }, { "epoch": 0.76, "learning_rate": 2.8767342780375926e-05, "loss": 1.1866, "step": 333 }, { "epoch": 0.76, "learning_rate": 2.8249274295566864e-05, "loss": 1.1612, "step": 334 }, { "epoch": 0.76, "learning_rate": 2.7735144868465458e-05, "loss": 1.0186, "step": 335 }, { "epoch": 0.77, "learning_rate": 2.7224982724367776e-05, "loss": 1.1382, "step": 336 }, { "epoch": 0.77, "learning_rate": 2.6718815870769287e-05, "loss": 1.0752, "step": 337 }, { "epoch": 0.77, "learning_rate": 2.6216672095827266e-05, "loss": 1.0572, "step": 338 }, { "epoch": 0.77, "learning_rate": 2.5718578966835117e-05, "loss": 1.1551, "step": 339 }, { "epoch": 0.78, "learning_rate": 2.5224563828708902e-05, "loss": 1.1094, "step": 340 }, { "epoch": 0.78, "learning_rate": 2.4734653802486428e-05, "loss": 1.0506, "step": 341 }, { "epoch": 0.78, "learning_rate": 2.4248875783837987e-05, "loss": 1.185, "step": 342 }, { "epoch": 0.78, "learning_rate": 2.3767256441590014e-05, "loss": 0.823, "step": 343 }, { "epoch": 0.79, "learning_rate": 2.328982221626087e-05, "loss": 1.0136, "step": 344 }, { "epoch": 0.79, "learning_rate": 2.2816599318609368e-05, "loss": 1.13, "step": 345 }, { "epoch": 0.79, "learning_rate": 2.234761372819577e-05, "loss": 1.0324, "step": 346 }, { "epoch": 0.79, "learning_rate": 2.1882891191955534e-05, "loss": 1.0539, "step": 347 }, { "epoch": 0.79, "learning_rate": 2.1422457222785873e-05, "loss": 1.1255, "step": 348 }, { "epoch": 0.8, "learning_rate": 2.0966337098145037e-05, "loss": 1.0934, "step": 349 }, { "epoch": 0.8, "learning_rate": 2.0514555858664663e-05, "loss": 1.2765, "step": 350 }, { "epoch": 0.8, "learning_rate": 2.0067138306775124e-05, "loss": 0.9743, "step": 351 }, { "epoch": 0.8, "learning_rate": 1.9624109005343672e-05, "loss": 1.1101, "step": 352 }, { "epoch": 0.81, "learning_rate": 1.918549227632619e-05, "loss": 1.2542, "step": 353 }, { "epoch": 0.81, "learning_rate": 1.875131219943187e-05, "loss": 1.2118, "step": 354 }, { "epoch": 0.81, "learning_rate": 1.832159261080122e-05, "loss": 0.925, "step": 355 }, { "epoch": 0.81, "learning_rate": 1.7896357101697404e-05, "loss": 1.0461, "step": 356 }, { "epoch": 0.82, "learning_rate": 1.747562901721135e-05, "loss": 0.914, "step": 357 }, { "epoch": 0.82, "learning_rate": 1.7059431454979824e-05, "loss": 1.0492, "step": 358 }, { "epoch": 0.82, "learning_rate": 1.6647787263917612e-05, "loss": 1.2384, "step": 359 }, { "epoch": 0.82, "learning_rate": 1.6240719042963002e-05, "loss": 1.2198, "step": 360 }, { "epoch": 0.82, "learning_rate": 1.5838249139837202e-05, "loss": 1.1803, "step": 361 }, { "epoch": 0.83, "learning_rate": 1.5440399649817385e-05, "loss": 1.0166, "step": 362 }, { "epoch": 0.83, "learning_rate": 1.5047192414523815e-05, "loss": 0.9622, "step": 363 }, { "epoch": 0.83, "learning_rate": 1.4658649020720538e-05, "loss": 1.04, "step": 364 }, { "epoch": 0.83, "learning_rate": 1.42747907991305e-05, "loss": 1.0195, "step": 365 }, { "epoch": 0.84, "learning_rate": 1.3895638823264446e-05, "loss": 1.1031, "step": 366 }, { "epoch": 0.84, "learning_rate": 1.352121390826393e-05, "loss": 1.1878, "step": 367 }, { "epoch": 0.84, "learning_rate": 1.3151536609758586e-05, "loss": 1.3069, "step": 368 }, { "epoch": 0.84, "learning_rate": 1.27866272227378e-05, "loss": 1.2004, "step": 369 }, { "epoch": 0.84, "learning_rate": 1.2426505780436326e-05, "loss": 1.1324, "step": 370 }, { "epoch": 0.85, "learning_rate": 1.2071192053234581e-05, "loss": 1.3239, "step": 371 }, { "epoch": 0.85, "learning_rate": 1.1720705547573263e-05, "loss": 1.0881, "step": 372 }, { "epoch": 0.85, "learning_rate": 1.1375065504882465e-05, "loss": 1.0445, "step": 373 }, { "epoch": 0.85, "learning_rate": 1.103429090052528e-05, "loss": 1.0811, "step": 374 }, { "epoch": 0.86, "learning_rate": 1.0698400442756152e-05, "loss": 1.0309, "step": 375 }, { "epoch": 0.86, "learning_rate": 1.0367412571693747e-05, "loss": 1.1477, "step": 376 }, { "epoch": 0.86, "learning_rate": 1.0041345458308616e-05, "loss": 1.0614, "step": 377 }, { "epoch": 0.86, "learning_rate": 9.720217003425647e-06, "loss": 1.2404, "step": 378 }, { "epoch": 0.87, "learning_rate": 9.404044836741343e-06, "loss": 1.1101, "step": 379 }, { "epoch": 0.87, "learning_rate": 9.092846315855841e-06, "loss": 1.1387, "step": 380 }, { "epoch": 0.87, "learning_rate": 8.786638525320146e-06, "loss": 1.1928, "step": 381 }, { "epoch": 0.87, "learning_rate": 8.485438275698154e-06, "loss": 1.1997, "step": 382 }, { "epoch": 0.87, "learning_rate": 8.189262102643746e-06, "loss": 1.0821, "step": 383 }, { "epoch": 0.88, "learning_rate": 7.89812626599291e-06, "loss": 1.0979, "step": 384 }, { "epoch": 0.88, "learning_rate": 7.612046748871327e-06, "loss": 1.1719, "step": 385 }, { "epoch": 0.88, "learning_rate": 7.331039256816663e-06, "loss": 1.1024, "step": 386 }, { "epoch": 0.88, "learning_rate": 7.0551192169164505e-06, "loss": 1.1508, "step": 387 }, { "epoch": 0.89, "learning_rate": 6.7843017769612215e-06, "loss": 1.0387, "step": 388 }, { "epoch": 0.89, "learning_rate": 6.518601804612734e-06, "loss": 1.2251, "step": 389 }, { "epoch": 0.89, "learning_rate": 6.258033886587911e-06, "loss": 1.241, "step": 390 }, { "epoch": 0.89, "learning_rate": 6.00261232785797e-06, "loss": 1.2481, "step": 391 }, { "epoch": 0.89, "learning_rate": 5.7523511508631336e-06, "loss": 1.2674, "step": 392 }, { "epoch": 0.9, "learning_rate": 5.50726409474267e-06, "loss": 1.0774, "step": 393 }, { "epoch": 0.9, "learning_rate": 5.267364614580861e-06, "loss": 1.2461, "step": 394 }, { "epoch": 0.9, "learning_rate": 5.032665880668119e-06, "loss": 1.0963, "step": 395 }, { "epoch": 0.9, "learning_rate": 4.803180777778049e-06, "loss": 1.252, "step": 396 }, { "epoch": 0.91, "learning_rate": 4.578921904460076e-06, "loss": 1.0251, "step": 397 }, { "epoch": 0.91, "learning_rate": 4.359901572347758e-06, "loss": 0.9214, "step": 398 }, { "epoch": 0.91, "learning_rate": 4.146131805482944e-06, "loss": 1.1498, "step": 399 }, { "epoch": 0.91, "learning_rate": 3.937624339655599e-06, "loss": 0.8854, "step": 400 }, { "epoch": 0.92, "learning_rate": 3.734390621759565e-06, "loss": 1.221, "step": 401 }, { "epoch": 0.92, "learning_rate": 3.5364418091641373e-06, "loss": 1.0589, "step": 402 }, { "epoch": 0.92, "learning_rate": 3.343788769101486e-06, "loss": 1.1017, "step": 403 }, { "epoch": 0.92, "learning_rate": 3.156442078070143e-06, "loss": 1.0694, "step": 404 }, { "epoch": 0.92, "learning_rate": 2.974412021254236e-06, "loss": 1.2257, "step": 405 }, { "epoch": 0.93, "learning_rate": 2.7977085919589254e-06, "loss": 1.1377, "step": 406 }, { "epoch": 0.93, "learning_rate": 2.6263414910618012e-06, "loss": 1.0816, "step": 407 }, { "epoch": 0.93, "learning_rate": 2.460320126480242e-06, "loss": 0.9841, "step": 408 }, { "epoch": 0.93, "learning_rate": 2.2996536126549395e-06, "loss": 1.2322, "step": 409 }, { "epoch": 0.94, "learning_rate": 2.144350770049597e-06, "loss": 0.9069, "step": 410 }, { "epoch": 0.94, "learning_rate": 1.9944201246666074e-06, "loss": 1.0551, "step": 411 }, { "epoch": 0.94, "learning_rate": 1.8498699075789939e-06, "loss": 1.0453, "step": 412 }, { "epoch": 0.94, "learning_rate": 1.7107080544785937e-06, "loss": 0.9641, "step": 413 }, { "epoch": 0.95, "learning_rate": 1.576942205240317e-06, "loss": 1.1408, "step": 414 }, { "epoch": 0.95, "learning_rate": 1.4485797035027727e-06, "loss": 1.2242, "step": 415 }, { "epoch": 0.95, "learning_rate": 1.3256275962651222e-06, "loss": 1.0917, "step": 416 }, { "epoch": 0.95, "learning_rate": 1.208092633500102e-06, "loss": 1.0118, "step": 417 }, { "epoch": 0.95, "learning_rate": 1.0959812677835968e-06, "loss": 1.0072, "step": 418 }, { "epoch": 0.96, "learning_rate": 9.892996539403232e-07, "loss": 1.0862, "step": 419 }, { "epoch": 0.96, "learning_rate": 8.880536487059333e-07, "loss": 1.0861, "step": 420 }, { "epoch": 0.96, "learning_rate": 7.922488104054825e-07, "loss": 1.1889, "step": 421 }, { "epoch": 0.96, "learning_rate": 7.018903986483083e-07, "loss": 0.9407, "step": 422 }, { "epoch": 0.97, "learning_rate": 6.169833740392595e-07, "loss": 1.0854, "step": 423 }, { "epoch": 0.97, "learning_rate": 5.375323979063928e-07, "loss": 1.0511, "step": 424 }, { "epoch": 0.97, "learning_rate": 4.635418320450335e-07, "loss": 0.9293, "step": 425 }, { "epoch": 0.97, "learning_rate": 3.950157384783104e-07, "loss": 0.9677, "step": 426 }, { "epoch": 0.97, "learning_rate": 3.319578792342126e-07, "loss": 1.1209, "step": 427 }, { "epoch": 0.98, "learning_rate": 2.7437171613898807e-07, "loss": 1.1188, "step": 428 }, { "epoch": 0.98, "learning_rate": 2.2226041062715086e-07, "loss": 1.2072, "step": 429 }, { "epoch": 0.98, "learning_rate": 1.7562682356786487e-07, "loss": 1.0988, "step": 430 }, { "epoch": 0.98, "learning_rate": 1.344735151079246e-07, "loss": 1.1216, "step": 431 }, { "epoch": 0.99, "learning_rate": 9.88027445312123e-08, "loss": 1.1095, "step": 432 }, { "epoch": 0.99, "learning_rate": 6.861647013461925e-08, "loss": 1.1363, "step": 433 }, { "epoch": 0.99, "learning_rate": 4.391634912056519e-08, "loss": 1.0079, "step": 434 }, { "epoch": 0.99, "learning_rate": 2.4703737506037715e-08, "loss": 1.0498, "step": 435 }, { "epoch": 1.0, "learning_rate": 1.0979690048107394e-08, "loss": 1.0434, "step": 436 }, { "epoch": 1.0, "learning_rate": 2.7449601860629613e-09, "loss": 1.0162, "step": 437 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 1.0504, "step": 438 }, { "epoch": 1.0, "step": 438, "total_flos": 8.085704333380813e+16, "train_loss": 1.1821429170157811, "train_runtime": 1894.5999, "train_samples_per_second": 3.695, "train_steps_per_second": 0.231 } ], "logging_steps": 1.0, "max_steps": 438, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50000, "total_flos": 8.085704333380813e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }