{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997658262430723, "eval_steps": 500, "global_step": 3202, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00031223167590352044, "grad_norm": 5.92302942276001, "learning_rate": 5e-05, "loss": 0.4328, "step": 1 }, { "epoch": 0.0006244633518070409, "grad_norm": 2.122042417526245, "learning_rate": 5e-05, "loss": 0.344, "step": 2 }, { "epoch": 0.0009366950277105613, "grad_norm": 1.5847505331039429, "learning_rate": 5e-05, "loss": 0.3021, "step": 3 }, { "epoch": 0.0012489267036140817, "grad_norm": 1.4155744314193726, "learning_rate": 5e-05, "loss": 0.2829, "step": 4 }, { "epoch": 0.001561158379517602, "grad_norm": 1.2378954887390137, "learning_rate": 5e-05, "loss": 0.252, "step": 5 }, { "epoch": 0.0018733900554211225, "grad_norm": 1.2338117361068726, "learning_rate": 5e-05, "loss": 0.2461, "step": 6 }, { "epoch": 0.0021856217313246428, "grad_norm": 1.0862535238265991, "learning_rate": 5e-05, "loss": 0.2451, "step": 7 }, { "epoch": 0.0024978534072281635, "grad_norm": 1.0666377544403076, "learning_rate": 5e-05, "loss": 0.2583, "step": 8 }, { "epoch": 0.0028100850831316838, "grad_norm": 1.0748363733291626, "learning_rate": 5e-05, "loss": 0.2425, "step": 9 }, { "epoch": 0.003122316759035204, "grad_norm": 1.1457756757736206, "learning_rate": 5e-05, "loss": 0.2539, "step": 10 }, { "epoch": 0.0034345484349387247, "grad_norm": 1.0843021869659424, "learning_rate": 5e-05, "loss": 0.2591, "step": 11 }, { "epoch": 0.003746780110842245, "grad_norm": 0.9943569302558899, "learning_rate": 5e-05, "loss": 0.2575, "step": 12 }, { "epoch": 0.004059011786745765, "grad_norm": 0.9921255111694336, "learning_rate": 5e-05, "loss": 0.2673, "step": 13 }, { "epoch": 0.0043712434626492856, "grad_norm": 1.023245930671692, "learning_rate": 5e-05, "loss": 0.2509, "step": 14 }, { "epoch": 0.004683475138552806, "grad_norm": 1.087518572807312, "learning_rate": 5e-05, "loss": 0.2546, "step": 15 }, { "epoch": 0.004995706814456327, "grad_norm": 1.0689735412597656, "learning_rate": 5e-05, "loss": 0.2805, "step": 16 }, { "epoch": 0.005307938490359847, "grad_norm": 1.0388258695602417, "learning_rate": 5e-05, "loss": 0.2673, "step": 17 }, { "epoch": 0.0056201701662633675, "grad_norm": 1.056992530822754, "learning_rate": 5e-05, "loss": 0.2616, "step": 18 }, { "epoch": 0.005932401842166888, "grad_norm": 0.9949621558189392, "learning_rate": 5e-05, "loss": 0.2325, "step": 19 }, { "epoch": 0.006244633518070408, "grad_norm": 0.9790173172950745, "learning_rate": 5e-05, "loss": 0.2415, "step": 20 }, { "epoch": 0.006556865193973928, "grad_norm": 1.962924838066101, "learning_rate": 5e-05, "loss": 0.2321, "step": 21 }, { "epoch": 0.0068690968698774495, "grad_norm": 1.020336389541626, "learning_rate": 5e-05, "loss": 0.255, "step": 22 }, { "epoch": 0.00718132854578097, "grad_norm": 0.9996064901351929, "learning_rate": 5e-05, "loss": 0.2433, "step": 23 }, { "epoch": 0.00749356022168449, "grad_norm": 1.0183420181274414, "learning_rate": 5e-05, "loss": 0.2623, "step": 24 }, { "epoch": 0.00780579189758801, "grad_norm": 0.9324208498001099, "learning_rate": 5e-05, "loss": 0.2618, "step": 25 }, { "epoch": 0.00811802357349153, "grad_norm": 0.962169349193573, "learning_rate": 5e-05, "loss": 0.2482, "step": 26 }, { "epoch": 0.00843025524939505, "grad_norm": 0.9282968044281006, "learning_rate": 5e-05, "loss": 0.2569, "step": 27 }, { "epoch": 0.008742486925298571, "grad_norm": 0.9337531328201294, "learning_rate": 5e-05, "loss": 0.2656, "step": 28 }, { "epoch": 0.009054718601202091, "grad_norm": 1.0030183792114258, "learning_rate": 5e-05, "loss": 0.2562, "step": 29 }, { "epoch": 0.009366950277105612, "grad_norm": 0.9647758603096008, "learning_rate": 5e-05, "loss": 0.2537, "step": 30 }, { "epoch": 0.009679181953009132, "grad_norm": 0.8999332189559937, "learning_rate": 5e-05, "loss": 0.2292, "step": 31 }, { "epoch": 0.009991413628912654, "grad_norm": 0.9450830221176147, "learning_rate": 5e-05, "loss": 0.231, "step": 32 }, { "epoch": 0.010303645304816174, "grad_norm": 1.0082815885543823, "learning_rate": 5e-05, "loss": 0.2496, "step": 33 }, { "epoch": 0.010615876980719694, "grad_norm": 0.9963700175285339, "learning_rate": 5e-05, "loss": 0.2618, "step": 34 }, { "epoch": 0.010928108656623215, "grad_norm": 0.9345080852508545, "learning_rate": 5e-05, "loss": 0.2451, "step": 35 }, { "epoch": 0.011240340332526735, "grad_norm": 0.890504002571106, "learning_rate": 5e-05, "loss": 0.2353, "step": 36 }, { "epoch": 0.011552572008430255, "grad_norm": 0.878871738910675, "learning_rate": 5e-05, "loss": 0.2431, "step": 37 }, { "epoch": 0.011864803684333776, "grad_norm": 0.866408109664917, "learning_rate": 5e-05, "loss": 0.2385, "step": 38 }, { "epoch": 0.012177035360237296, "grad_norm": 0.8790531158447266, "learning_rate": 5e-05, "loss": 0.2423, "step": 39 }, { "epoch": 0.012489267036140816, "grad_norm": 0.9295474290847778, "learning_rate": 5e-05, "loss": 0.2375, "step": 40 }, { "epoch": 0.012801498712044336, "grad_norm": 0.8549859523773193, "learning_rate": 5e-05, "loss": 0.2486, "step": 41 }, { "epoch": 0.013113730387947857, "grad_norm": 0.9811318516731262, "learning_rate": 5e-05, "loss": 0.2755, "step": 42 }, { "epoch": 0.013425962063851377, "grad_norm": 0.9415158629417419, "learning_rate": 5e-05, "loss": 0.2541, "step": 43 }, { "epoch": 0.013738193739754899, "grad_norm": 0.8856943845748901, "learning_rate": 5e-05, "loss": 0.2249, "step": 44 }, { "epoch": 0.01405042541565842, "grad_norm": 0.9468849301338196, "learning_rate": 5e-05, "loss": 0.2395, "step": 45 }, { "epoch": 0.01436265709156194, "grad_norm": 0.9523277878761292, "learning_rate": 5e-05, "loss": 0.2535, "step": 46 }, { "epoch": 0.01467488876746546, "grad_norm": 0.8940654397010803, "learning_rate": 5e-05, "loss": 0.2287, "step": 47 }, { "epoch": 0.01498712044336898, "grad_norm": 0.8535509705543518, "learning_rate": 5e-05, "loss": 0.2332, "step": 48 }, { "epoch": 0.0152993521192725, "grad_norm": 0.884842038154602, "learning_rate": 5e-05, "loss": 0.2478, "step": 49 }, { "epoch": 0.01561158379517602, "grad_norm": 0.8986355066299438, "learning_rate": 5e-05, "loss": 0.2484, "step": 50 }, { "epoch": 0.01592381547107954, "grad_norm": 0.8694790005683899, "learning_rate": 5e-05, "loss": 0.2443, "step": 51 }, { "epoch": 0.01623604714698306, "grad_norm": 0.8983823657035828, "learning_rate": 5e-05, "loss": 0.2469, "step": 52 }, { "epoch": 0.01654827882288658, "grad_norm": 0.8519140481948853, "learning_rate": 5e-05, "loss": 0.2367, "step": 53 }, { "epoch": 0.0168605104987901, "grad_norm": 0.8456822633743286, "learning_rate": 5e-05, "loss": 0.2245, "step": 54 }, { "epoch": 0.017172742174693622, "grad_norm": 0.9412555694580078, "learning_rate": 5e-05, "loss": 0.2538, "step": 55 }, { "epoch": 0.017484973850597142, "grad_norm": 0.9004572033882141, "learning_rate": 5e-05, "loss": 0.2586, "step": 56 }, { "epoch": 0.017797205526500662, "grad_norm": 0.8651750683784485, "learning_rate": 5e-05, "loss": 0.2341, "step": 57 }, { "epoch": 0.018109437202404183, "grad_norm": 0.9427449107170105, "learning_rate": 5e-05, "loss": 0.2577, "step": 58 }, { "epoch": 0.018421668878307703, "grad_norm": 0.8608784079551697, "learning_rate": 5e-05, "loss": 0.2429, "step": 59 }, { "epoch": 0.018733900554211223, "grad_norm": 0.8825051188468933, "learning_rate": 5e-05, "loss": 0.244, "step": 60 }, { "epoch": 0.019046132230114744, "grad_norm": 0.8609340786933899, "learning_rate": 5e-05, "loss": 0.2443, "step": 61 }, { "epoch": 0.019358363906018264, "grad_norm": 0.9195008873939514, "learning_rate": 5e-05, "loss": 0.259, "step": 62 }, { "epoch": 0.019670595581921788, "grad_norm": 0.8513927459716797, "learning_rate": 5e-05, "loss": 0.2518, "step": 63 }, { "epoch": 0.019982827257825308, "grad_norm": 0.8520167469978333, "learning_rate": 5e-05, "loss": 0.2399, "step": 64 }, { "epoch": 0.020295058933728828, "grad_norm": 0.8739352226257324, "learning_rate": 5e-05, "loss": 0.2549, "step": 65 }, { "epoch": 0.02060729060963235, "grad_norm": 0.8346822261810303, "learning_rate": 5e-05, "loss": 0.2417, "step": 66 }, { "epoch": 0.02091952228553587, "grad_norm": 0.8409432768821716, "learning_rate": 5e-05, "loss": 0.2418, "step": 67 }, { "epoch": 0.02123175396143939, "grad_norm": 0.8686328530311584, "learning_rate": 5e-05, "loss": 0.2495, "step": 68 }, { "epoch": 0.02154398563734291, "grad_norm": 0.9030161499977112, "learning_rate": 5e-05, "loss": 0.2583, "step": 69 }, { "epoch": 0.02185621731324643, "grad_norm": 0.85794997215271, "learning_rate": 5e-05, "loss": 0.253, "step": 70 }, { "epoch": 0.02216844898914995, "grad_norm": 0.9217143654823303, "learning_rate": 5e-05, "loss": 0.2465, "step": 71 }, { "epoch": 0.02248068066505347, "grad_norm": 0.8939414024353027, "learning_rate": 5e-05, "loss": 0.2446, "step": 72 }, { "epoch": 0.02279291234095699, "grad_norm": 0.8693103194236755, "learning_rate": 5e-05, "loss": 0.2631, "step": 73 }, { "epoch": 0.02310514401686051, "grad_norm": 0.8887609243392944, "learning_rate": 5e-05, "loss": 0.2531, "step": 74 }, { "epoch": 0.02341737569276403, "grad_norm": 0.8733563423156738, "learning_rate": 5e-05, "loss": 0.253, "step": 75 }, { "epoch": 0.02372960736866755, "grad_norm": 0.8297935724258423, "learning_rate": 5e-05, "loss": 0.2441, "step": 76 }, { "epoch": 0.02404183904457107, "grad_norm": 0.8870866894721985, "learning_rate": 5e-05, "loss": 0.2545, "step": 77 }, { "epoch": 0.02435407072047459, "grad_norm": 0.8497397303581238, "learning_rate": 5e-05, "loss": 0.2586, "step": 78 }, { "epoch": 0.024666302396378112, "grad_norm": 0.8706568479537964, "learning_rate": 5e-05, "loss": 0.251, "step": 79 }, { "epoch": 0.024978534072281632, "grad_norm": 0.7608910202980042, "learning_rate": 5e-05, "loss": 0.2171, "step": 80 }, { "epoch": 0.025290765748185152, "grad_norm": 0.8609456419944763, "learning_rate": 5e-05, "loss": 0.2498, "step": 81 }, { "epoch": 0.025602997424088673, "grad_norm": 0.8762296438217163, "learning_rate": 5e-05, "loss": 0.2244, "step": 82 }, { "epoch": 0.025915229099992193, "grad_norm": 0.8355544209480286, "learning_rate": 5e-05, "loss": 0.2475, "step": 83 }, { "epoch": 0.026227460775895713, "grad_norm": 0.9140622615814209, "learning_rate": 5e-05, "loss": 0.2524, "step": 84 }, { "epoch": 0.026539692451799234, "grad_norm": 0.8379641175270081, "learning_rate": 5e-05, "loss": 0.2416, "step": 85 }, { "epoch": 0.026851924127702754, "grad_norm": 0.9126095175743103, "learning_rate": 5e-05, "loss": 0.2471, "step": 86 }, { "epoch": 0.027164155803606274, "grad_norm": 0.8257933259010315, "learning_rate": 5e-05, "loss": 0.2393, "step": 87 }, { "epoch": 0.027476387479509798, "grad_norm": 0.8690134882926941, "learning_rate": 5e-05, "loss": 0.2434, "step": 88 }, { "epoch": 0.027788619155413318, "grad_norm": 0.8215010166168213, "learning_rate": 5e-05, "loss": 0.223, "step": 89 }, { "epoch": 0.02810085083131684, "grad_norm": 0.8241698741912842, "learning_rate": 5e-05, "loss": 0.2315, "step": 90 }, { "epoch": 0.02841308250722036, "grad_norm": 0.7947086095809937, "learning_rate": 5e-05, "loss": 0.2337, "step": 91 }, { "epoch": 0.02872531418312388, "grad_norm": 0.8368167281150818, "learning_rate": 5e-05, "loss": 0.2483, "step": 92 }, { "epoch": 0.0290375458590274, "grad_norm": 0.8646368384361267, "learning_rate": 5e-05, "loss": 0.2426, "step": 93 }, { "epoch": 0.02934977753493092, "grad_norm": 0.88730788230896, "learning_rate": 5e-05, "loss": 0.2673, "step": 94 }, { "epoch": 0.02966200921083444, "grad_norm": 0.8408634662628174, "learning_rate": 5e-05, "loss": 0.2457, "step": 95 }, { "epoch": 0.02997424088673796, "grad_norm": 0.8297551274299622, "learning_rate": 5e-05, "loss": 0.2489, "step": 96 }, { "epoch": 0.03028647256264148, "grad_norm": 0.8500155210494995, "learning_rate": 5e-05, "loss": 0.2311, "step": 97 }, { "epoch": 0.030598704238545, "grad_norm": 0.8076023459434509, "learning_rate": 5e-05, "loss": 0.233, "step": 98 }, { "epoch": 0.03091093591444852, "grad_norm": 0.8306854963302612, "learning_rate": 5e-05, "loss": 0.2284, "step": 99 }, { "epoch": 0.03122316759035204, "grad_norm": 0.822745144367218, "learning_rate": 5e-05, "loss": 0.2341, "step": 100 }, { "epoch": 0.031535399266255565, "grad_norm": 0.8455970883369446, "learning_rate": 5e-05, "loss": 0.2571, "step": 101 }, { "epoch": 0.03184763094215908, "grad_norm": 0.8684026002883911, "learning_rate": 5e-05, "loss": 0.2531, "step": 102 }, { "epoch": 0.032159862618062605, "grad_norm": 0.8494640588760376, "learning_rate": 5e-05, "loss": 0.2408, "step": 103 }, { "epoch": 0.03247209429396612, "grad_norm": 0.8604395389556885, "learning_rate": 5e-05, "loss": 0.2558, "step": 104 }, { "epoch": 0.032784325969869646, "grad_norm": 0.8439518809318542, "learning_rate": 5e-05, "loss": 0.2608, "step": 105 }, { "epoch": 0.03309655764577316, "grad_norm": 0.8729404807090759, "learning_rate": 5e-05, "loss": 0.2571, "step": 106 }, { "epoch": 0.033408789321676687, "grad_norm": 0.8557404279708862, "learning_rate": 5e-05, "loss": 0.248, "step": 107 }, { "epoch": 0.0337210209975802, "grad_norm": 0.8192147016525269, "learning_rate": 5e-05, "loss": 0.235, "step": 108 }, { "epoch": 0.03403325267348373, "grad_norm": 0.7668401002883911, "learning_rate": 5e-05, "loss": 0.235, "step": 109 }, { "epoch": 0.034345484349387244, "grad_norm": 0.8256463408470154, "learning_rate": 5e-05, "loss": 0.2563, "step": 110 }, { "epoch": 0.03465771602529077, "grad_norm": 0.8395894765853882, "learning_rate": 5e-05, "loss": 0.2568, "step": 111 }, { "epoch": 0.034969947701194284, "grad_norm": 0.850821316242218, "learning_rate": 5e-05, "loss": 0.2606, "step": 112 }, { "epoch": 0.03528217937709781, "grad_norm": 0.7850267887115479, "learning_rate": 5e-05, "loss": 0.2175, "step": 113 }, { "epoch": 0.035594411053001325, "grad_norm": 0.8105971813201904, "learning_rate": 5e-05, "loss": 0.2366, "step": 114 }, { "epoch": 0.03590664272890485, "grad_norm": 0.817332923412323, "learning_rate": 5e-05, "loss": 0.232, "step": 115 }, { "epoch": 0.036218874404808366, "grad_norm": 0.8478107452392578, "learning_rate": 5e-05, "loss": 0.2623, "step": 116 }, { "epoch": 0.03653110608071189, "grad_norm": 0.8151906728744507, "learning_rate": 5e-05, "loss": 0.2331, "step": 117 }, { "epoch": 0.036843337756615406, "grad_norm": 0.7804871201515198, "learning_rate": 5e-05, "loss": 0.2349, "step": 118 }, { "epoch": 0.03715556943251893, "grad_norm": 0.8148698806762695, "learning_rate": 5e-05, "loss": 0.2188, "step": 119 }, { "epoch": 0.03746780110842245, "grad_norm": 0.7643448114395142, "learning_rate": 5e-05, "loss": 0.2172, "step": 120 }, { "epoch": 0.03778003278432597, "grad_norm": 0.8426647782325745, "learning_rate": 5e-05, "loss": 0.2385, "step": 121 }, { "epoch": 0.03809226446022949, "grad_norm": 0.8287621140480042, "learning_rate": 5e-05, "loss": 0.2264, "step": 122 }, { "epoch": 0.03840449613613301, "grad_norm": 0.8123031258583069, "learning_rate": 5e-05, "loss": 0.2274, "step": 123 }, { "epoch": 0.03871672781203653, "grad_norm": 0.8115465044975281, "learning_rate": 5e-05, "loss": 0.238, "step": 124 }, { "epoch": 0.03902895948794005, "grad_norm": 0.8803959488868713, "learning_rate": 5e-05, "loss": 0.2606, "step": 125 }, { "epoch": 0.039341191163843575, "grad_norm": 0.814569354057312, "learning_rate": 5e-05, "loss": 0.232, "step": 126 }, { "epoch": 0.03965342283974709, "grad_norm": 0.8246992230415344, "learning_rate": 5e-05, "loss": 0.2478, "step": 127 }, { "epoch": 0.039965654515650616, "grad_norm": 0.784478485584259, "learning_rate": 5e-05, "loss": 0.2394, "step": 128 }, { "epoch": 0.04027788619155413, "grad_norm": 0.8765490055084229, "learning_rate": 5e-05, "loss": 0.26, "step": 129 }, { "epoch": 0.040590117867457656, "grad_norm": 0.874072253704071, "learning_rate": 5e-05, "loss": 0.2564, "step": 130 }, { "epoch": 0.04090234954336117, "grad_norm": 0.7847826480865479, "learning_rate": 5e-05, "loss": 0.2422, "step": 131 }, { "epoch": 0.0412145812192647, "grad_norm": 0.8156128525733948, "learning_rate": 5e-05, "loss": 0.2402, "step": 132 }, { "epoch": 0.041526812895168214, "grad_norm": 0.8235909938812256, "learning_rate": 5e-05, "loss": 0.258, "step": 133 }, { "epoch": 0.04183904457107174, "grad_norm": 0.8224372863769531, "learning_rate": 5e-05, "loss": 0.2407, "step": 134 }, { "epoch": 0.042151276246975254, "grad_norm": 0.8447765707969666, "learning_rate": 5e-05, "loss": 0.2484, "step": 135 }, { "epoch": 0.04246350792287878, "grad_norm": 0.8105494976043701, "learning_rate": 5e-05, "loss": 0.2373, "step": 136 }, { "epoch": 0.042775739598782295, "grad_norm": 0.7966398596763611, "learning_rate": 5e-05, "loss": 0.2382, "step": 137 }, { "epoch": 0.04308797127468582, "grad_norm": 0.755123496055603, "learning_rate": 5e-05, "loss": 0.2189, "step": 138 }, { "epoch": 0.043400202950589335, "grad_norm": 0.869744598865509, "learning_rate": 5e-05, "loss": 0.2638, "step": 139 }, { "epoch": 0.04371243462649286, "grad_norm": 0.7719473838806152, "learning_rate": 5e-05, "loss": 0.2439, "step": 140 }, { "epoch": 0.044024666302396376, "grad_norm": 0.8012150526046753, "learning_rate": 5e-05, "loss": 0.259, "step": 141 }, { "epoch": 0.0443368979782999, "grad_norm": 0.8140265941619873, "learning_rate": 5e-05, "loss": 0.2257, "step": 142 }, { "epoch": 0.044649129654203416, "grad_norm": 0.8300631642341614, "learning_rate": 5e-05, "loss": 0.2367, "step": 143 }, { "epoch": 0.04496136133010694, "grad_norm": 0.7956774830818176, "learning_rate": 5e-05, "loss": 0.2479, "step": 144 }, { "epoch": 0.04527359300601046, "grad_norm": 0.7956410646438599, "learning_rate": 5e-05, "loss": 0.2524, "step": 145 }, { "epoch": 0.04558582468191398, "grad_norm": 0.8027469515800476, "learning_rate": 5e-05, "loss": 0.2284, "step": 146 }, { "epoch": 0.0458980563578175, "grad_norm": 0.7721954584121704, "learning_rate": 5e-05, "loss": 0.2322, "step": 147 }, { "epoch": 0.04621028803372102, "grad_norm": 0.7992778420448303, "learning_rate": 5e-05, "loss": 0.2301, "step": 148 }, { "epoch": 0.04652251970962454, "grad_norm": 0.8082111477851868, "learning_rate": 5e-05, "loss": 0.2391, "step": 149 }, { "epoch": 0.04683475138552806, "grad_norm": 0.7768813967704773, "learning_rate": 5e-05, "loss": 0.2387, "step": 150 }, { "epoch": 0.047146983061431585, "grad_norm": 0.7784020900726318, "learning_rate": 5e-05, "loss": 0.2296, "step": 151 }, { "epoch": 0.0474592147373351, "grad_norm": 0.812147855758667, "learning_rate": 5e-05, "loss": 0.2366, "step": 152 }, { "epoch": 0.047771446413238626, "grad_norm": 0.858238697052002, "learning_rate": 5e-05, "loss": 0.2394, "step": 153 }, { "epoch": 0.04808367808914214, "grad_norm": 0.8130385279655457, "learning_rate": 5e-05, "loss": 0.24, "step": 154 }, { "epoch": 0.04839590976504567, "grad_norm": 0.8460467457771301, "learning_rate": 5e-05, "loss": 0.2673, "step": 155 }, { "epoch": 0.04870814144094918, "grad_norm": 0.8528612852096558, "learning_rate": 5e-05, "loss": 0.2527, "step": 156 }, { "epoch": 0.04902037311685271, "grad_norm": 0.7896693348884583, "learning_rate": 5e-05, "loss": 0.233, "step": 157 }, { "epoch": 0.049332604792756224, "grad_norm": 0.7992597818374634, "learning_rate": 5e-05, "loss": 0.2493, "step": 158 }, { "epoch": 0.04964483646865975, "grad_norm": 0.7769232392311096, "learning_rate": 5e-05, "loss": 0.2293, "step": 159 }, { "epoch": 0.049957068144563264, "grad_norm": 0.7903576493263245, "learning_rate": 5e-05, "loss": 0.2474, "step": 160 }, { "epoch": 0.05026929982046679, "grad_norm": 0.7604594230651855, "learning_rate": 5e-05, "loss": 0.2338, "step": 161 }, { "epoch": 0.050581531496370305, "grad_norm": 0.7907264828681946, "learning_rate": 5e-05, "loss": 0.2472, "step": 162 }, { "epoch": 0.05089376317227383, "grad_norm": 0.7807927131652832, "learning_rate": 5e-05, "loss": 0.2397, "step": 163 }, { "epoch": 0.051205994848177346, "grad_norm": 0.7773512005805969, "learning_rate": 5e-05, "loss": 0.2499, "step": 164 }, { "epoch": 0.05151822652408087, "grad_norm": 0.7451511025428772, "learning_rate": 5e-05, "loss": 0.2253, "step": 165 }, { "epoch": 0.051830458199984386, "grad_norm": 0.7733394503593445, "learning_rate": 5e-05, "loss": 0.2147, "step": 166 }, { "epoch": 0.05214268987588791, "grad_norm": 0.7809682488441467, "learning_rate": 5e-05, "loss": 0.2439, "step": 167 }, { "epoch": 0.05245492155179143, "grad_norm": 0.779848039150238, "learning_rate": 5e-05, "loss": 0.2309, "step": 168 }, { "epoch": 0.05276715322769495, "grad_norm": 0.786044716835022, "learning_rate": 5e-05, "loss": 0.2394, "step": 169 }, { "epoch": 0.05307938490359847, "grad_norm": 0.7808971405029297, "learning_rate": 5e-05, "loss": 0.2342, "step": 170 }, { "epoch": 0.05339161657950199, "grad_norm": 0.7769681215286255, "learning_rate": 5e-05, "loss": 0.2276, "step": 171 }, { "epoch": 0.05370384825540551, "grad_norm": 0.8237590789794922, "learning_rate": 5e-05, "loss": 0.2479, "step": 172 }, { "epoch": 0.05401607993130903, "grad_norm": 0.781011700630188, "learning_rate": 5e-05, "loss": 0.2449, "step": 173 }, { "epoch": 0.05432831160721255, "grad_norm": 0.8094984292984009, "learning_rate": 5e-05, "loss": 0.2469, "step": 174 }, { "epoch": 0.05464054328311607, "grad_norm": 0.8182822465896606, "learning_rate": 5e-05, "loss": 0.2568, "step": 175 }, { "epoch": 0.054952774959019596, "grad_norm": 0.7729642391204834, "learning_rate": 5e-05, "loss": 0.2414, "step": 176 }, { "epoch": 0.05526500663492311, "grad_norm": 0.7684212327003479, "learning_rate": 5e-05, "loss": 0.2253, "step": 177 }, { "epoch": 0.055577238310826636, "grad_norm": 0.784989058971405, "learning_rate": 5e-05, "loss": 0.2393, "step": 178 }, { "epoch": 0.05588946998673015, "grad_norm": 0.7806195616722107, "learning_rate": 5e-05, "loss": 0.2235, "step": 179 }, { "epoch": 0.05620170166263368, "grad_norm": 0.8044819831848145, "learning_rate": 5e-05, "loss": 0.2493, "step": 180 }, { "epoch": 0.056513933338537194, "grad_norm": 0.7622436881065369, "learning_rate": 5e-05, "loss": 0.24, "step": 181 }, { "epoch": 0.05682616501444072, "grad_norm": 0.7646917700767517, "learning_rate": 5e-05, "loss": 0.2289, "step": 182 }, { "epoch": 0.057138396690344234, "grad_norm": 0.7814666628837585, "learning_rate": 5e-05, "loss": 0.2247, "step": 183 }, { "epoch": 0.05745062836624776, "grad_norm": 0.7709435820579529, "learning_rate": 5e-05, "loss": 0.2346, "step": 184 }, { "epoch": 0.057762860042151275, "grad_norm": 0.7891873717308044, "learning_rate": 5e-05, "loss": 0.2485, "step": 185 }, { "epoch": 0.0580750917180548, "grad_norm": 0.7606446743011475, "learning_rate": 5e-05, "loss": 0.2282, "step": 186 }, { "epoch": 0.058387323393958315, "grad_norm": 0.8126403093338013, "learning_rate": 5e-05, "loss": 0.2388, "step": 187 }, { "epoch": 0.05869955506986184, "grad_norm": 0.7814178466796875, "learning_rate": 5e-05, "loss": 0.2549, "step": 188 }, { "epoch": 0.059011786745765356, "grad_norm": 0.7404866814613342, "learning_rate": 5e-05, "loss": 0.2297, "step": 189 }, { "epoch": 0.05932401842166888, "grad_norm": 0.7703705430030823, "learning_rate": 5e-05, "loss": 0.2311, "step": 190 }, { "epoch": 0.059636250097572396, "grad_norm": 0.7442575693130493, "learning_rate": 5e-05, "loss": 0.2398, "step": 191 }, { "epoch": 0.05994848177347592, "grad_norm": 0.8157030344009399, "learning_rate": 5e-05, "loss": 0.2489, "step": 192 }, { "epoch": 0.06026071344937944, "grad_norm": 0.7885603904724121, "learning_rate": 5e-05, "loss": 0.2512, "step": 193 }, { "epoch": 0.06057294512528296, "grad_norm": 0.7747854590415955, "learning_rate": 5e-05, "loss": 0.2453, "step": 194 }, { "epoch": 0.06088517680118648, "grad_norm": 0.7648340463638306, "learning_rate": 5e-05, "loss": 0.2271, "step": 195 }, { "epoch": 0.06119740847709, "grad_norm": 0.8172813653945923, "learning_rate": 5e-05, "loss": 0.2416, "step": 196 }, { "epoch": 0.06150964015299352, "grad_norm": 0.7287509441375732, "learning_rate": 5e-05, "loss": 0.2449, "step": 197 }, { "epoch": 0.06182187182889704, "grad_norm": 0.7789535522460938, "learning_rate": 5e-05, "loss": 0.24, "step": 198 }, { "epoch": 0.06213410350480056, "grad_norm": 0.8249151110649109, "learning_rate": 5e-05, "loss": 0.2375, "step": 199 }, { "epoch": 0.06244633518070408, "grad_norm": 0.7668337225914001, "learning_rate": 5e-05, "loss": 0.2496, "step": 200 }, { "epoch": 0.0627585668566076, "grad_norm": 0.7561071515083313, "learning_rate": 5e-05, "loss": 0.2431, "step": 201 }, { "epoch": 0.06307079853251113, "grad_norm": 0.7412956953048706, "learning_rate": 5e-05, "loss": 0.2361, "step": 202 }, { "epoch": 0.06338303020841464, "grad_norm": 0.7701509594917297, "learning_rate": 5e-05, "loss": 0.237, "step": 203 }, { "epoch": 0.06369526188431816, "grad_norm": 0.7646925449371338, "learning_rate": 5e-05, "loss": 0.2316, "step": 204 }, { "epoch": 0.06400749356022169, "grad_norm": 0.7612301707267761, "learning_rate": 5e-05, "loss": 0.2468, "step": 205 }, { "epoch": 0.06431972523612521, "grad_norm": 0.7722594738006592, "learning_rate": 5e-05, "loss": 0.2266, "step": 206 }, { "epoch": 0.06463195691202872, "grad_norm": 0.8034259676933289, "learning_rate": 5e-05, "loss": 0.2534, "step": 207 }, { "epoch": 0.06494418858793224, "grad_norm": 0.843703031539917, "learning_rate": 5e-05, "loss": 0.2732, "step": 208 }, { "epoch": 0.06525642026383577, "grad_norm": 0.7067863345146179, "learning_rate": 5e-05, "loss": 0.2212, "step": 209 }, { "epoch": 0.06556865193973929, "grad_norm": 0.8069685697555542, "learning_rate": 5e-05, "loss": 0.2394, "step": 210 }, { "epoch": 0.0658808836156428, "grad_norm": 0.7935125231742859, "learning_rate": 5e-05, "loss": 0.2514, "step": 211 }, { "epoch": 0.06619311529154633, "grad_norm": 0.7550068497657776, "learning_rate": 5e-05, "loss": 0.2395, "step": 212 }, { "epoch": 0.06650534696744985, "grad_norm": 0.745366632938385, "learning_rate": 5e-05, "loss": 0.223, "step": 213 }, { "epoch": 0.06681757864335337, "grad_norm": 0.7638828158378601, "learning_rate": 5e-05, "loss": 0.2322, "step": 214 }, { "epoch": 0.06712981031925688, "grad_norm": 0.7647759318351746, "learning_rate": 5e-05, "loss": 0.2338, "step": 215 }, { "epoch": 0.0674420419951604, "grad_norm": 0.7917117476463318, "learning_rate": 5e-05, "loss": 0.2443, "step": 216 }, { "epoch": 0.06775427367106393, "grad_norm": 0.7291285395622253, "learning_rate": 5e-05, "loss": 0.2376, "step": 217 }, { "epoch": 0.06806650534696745, "grad_norm": 0.7071493268013, "learning_rate": 5e-05, "loss": 0.2258, "step": 218 }, { "epoch": 0.06837873702287096, "grad_norm": 0.7396230697631836, "learning_rate": 5e-05, "loss": 0.2356, "step": 219 }, { "epoch": 0.06869096869877449, "grad_norm": 0.7676906585693359, "learning_rate": 5e-05, "loss": 0.2556, "step": 220 }, { "epoch": 0.06900320037467801, "grad_norm": 0.725475549697876, "learning_rate": 5e-05, "loss": 0.2256, "step": 221 }, { "epoch": 0.06931543205058154, "grad_norm": 0.8201805353164673, "learning_rate": 5e-05, "loss": 0.2523, "step": 222 }, { "epoch": 0.06962766372648505, "grad_norm": 0.8155292868614197, "learning_rate": 5e-05, "loss": 0.2495, "step": 223 }, { "epoch": 0.06993989540238857, "grad_norm": 0.7480824589729309, "learning_rate": 5e-05, "loss": 0.241, "step": 224 }, { "epoch": 0.07025212707829209, "grad_norm": 0.7418039441108704, "learning_rate": 5e-05, "loss": 0.2192, "step": 225 }, { "epoch": 0.07056435875419562, "grad_norm": 0.796464204788208, "learning_rate": 5e-05, "loss": 0.2411, "step": 226 }, { "epoch": 0.07087659043009914, "grad_norm": 0.7993951439857483, "learning_rate": 5e-05, "loss": 0.2463, "step": 227 }, { "epoch": 0.07118882210600265, "grad_norm": 0.720698893070221, "learning_rate": 5e-05, "loss": 0.2256, "step": 228 }, { "epoch": 0.07150105378190617, "grad_norm": 0.7721514701843262, "learning_rate": 5e-05, "loss": 0.2311, "step": 229 }, { "epoch": 0.0718132854578097, "grad_norm": 0.7636629343032837, "learning_rate": 5e-05, "loss": 0.2412, "step": 230 }, { "epoch": 0.07212551713371322, "grad_norm": 0.7318843603134155, "learning_rate": 5e-05, "loss": 0.2352, "step": 231 }, { "epoch": 0.07243774880961673, "grad_norm": 0.7422648072242737, "learning_rate": 5e-05, "loss": 0.2464, "step": 232 }, { "epoch": 0.07274998048552025, "grad_norm": 0.8024821281433105, "learning_rate": 5e-05, "loss": 0.2337, "step": 233 }, { "epoch": 0.07306221216142378, "grad_norm": 0.7292082905769348, "learning_rate": 5e-05, "loss": 0.2307, "step": 234 }, { "epoch": 0.0733744438373273, "grad_norm": 0.6996471285820007, "learning_rate": 5e-05, "loss": 0.2447, "step": 235 }, { "epoch": 0.07368667551323081, "grad_norm": 0.7467811703681946, "learning_rate": 5e-05, "loss": 0.2176, "step": 236 }, { "epoch": 0.07399890718913434, "grad_norm": 0.7504345774650574, "learning_rate": 5e-05, "loss": 0.2273, "step": 237 }, { "epoch": 0.07431113886503786, "grad_norm": 0.719676673412323, "learning_rate": 5e-05, "loss": 0.2201, "step": 238 }, { "epoch": 0.07462337054094138, "grad_norm": 0.7685948014259338, "learning_rate": 5e-05, "loss": 0.2363, "step": 239 }, { "epoch": 0.0749356022168449, "grad_norm": 0.8184857964515686, "learning_rate": 5e-05, "loss": 0.2433, "step": 240 }, { "epoch": 0.07524783389274842, "grad_norm": 0.8186751008033752, "learning_rate": 5e-05, "loss": 0.2517, "step": 241 }, { "epoch": 0.07556006556865194, "grad_norm": 0.6925832629203796, "learning_rate": 5e-05, "loss": 0.2065, "step": 242 }, { "epoch": 0.07587229724455546, "grad_norm": 0.7406442165374756, "learning_rate": 5e-05, "loss": 0.222, "step": 243 }, { "epoch": 0.07618452892045897, "grad_norm": 0.8030349016189575, "learning_rate": 5e-05, "loss": 0.2569, "step": 244 }, { "epoch": 0.0764967605963625, "grad_norm": 0.7676234245300293, "learning_rate": 5e-05, "loss": 0.2523, "step": 245 }, { "epoch": 0.07680899227226602, "grad_norm": 0.7622032761573792, "learning_rate": 5e-05, "loss": 0.2379, "step": 246 }, { "epoch": 0.07712122394816955, "grad_norm": 0.7631667256355286, "learning_rate": 5e-05, "loss": 0.2265, "step": 247 }, { "epoch": 0.07743345562407306, "grad_norm": 0.7795726656913757, "learning_rate": 5e-05, "loss": 0.2466, "step": 248 }, { "epoch": 0.07774568729997658, "grad_norm": 0.7595590353012085, "learning_rate": 5e-05, "loss": 0.2353, "step": 249 }, { "epoch": 0.0780579189758801, "grad_norm": 0.7386550307273865, "learning_rate": 5e-05, "loss": 0.2484, "step": 250 }, { "epoch": 0.07837015065178363, "grad_norm": 0.7970679998397827, "learning_rate": 5e-05, "loss": 0.2478, "step": 251 }, { "epoch": 0.07868238232768715, "grad_norm": 0.7414926290512085, "learning_rate": 5e-05, "loss": 0.2423, "step": 252 }, { "epoch": 0.07899461400359066, "grad_norm": 0.7165507674217224, "learning_rate": 5e-05, "loss": 0.2168, "step": 253 }, { "epoch": 0.07930684567949418, "grad_norm": 0.7429415583610535, "learning_rate": 5e-05, "loss": 0.2301, "step": 254 }, { "epoch": 0.07961907735539771, "grad_norm": 0.7714077830314636, "learning_rate": 5e-05, "loss": 0.2374, "step": 255 }, { "epoch": 0.07993130903130123, "grad_norm": 0.7683895230293274, "learning_rate": 5e-05, "loss": 0.2591, "step": 256 }, { "epoch": 0.08024354070720474, "grad_norm": 0.7994827032089233, "learning_rate": 5e-05, "loss": 0.2536, "step": 257 }, { "epoch": 0.08055577238310827, "grad_norm": 0.7642600536346436, "learning_rate": 5e-05, "loss": 0.2404, "step": 258 }, { "epoch": 0.08086800405901179, "grad_norm": 0.760262668132782, "learning_rate": 5e-05, "loss": 0.2478, "step": 259 }, { "epoch": 0.08118023573491531, "grad_norm": 0.7440392374992371, "learning_rate": 5e-05, "loss": 0.2477, "step": 260 }, { "epoch": 0.08149246741081882, "grad_norm": 0.7709441184997559, "learning_rate": 5e-05, "loss": 0.2451, "step": 261 }, { "epoch": 0.08180469908672235, "grad_norm": 0.7596651315689087, "learning_rate": 5e-05, "loss": 0.228, "step": 262 }, { "epoch": 0.08211693076262587, "grad_norm": 0.7565099000930786, "learning_rate": 5e-05, "loss": 0.2351, "step": 263 }, { "epoch": 0.0824291624385294, "grad_norm": 0.7472636103630066, "learning_rate": 5e-05, "loss": 0.2395, "step": 264 }, { "epoch": 0.0827413941144329, "grad_norm": 0.7651402950286865, "learning_rate": 5e-05, "loss": 0.2388, "step": 265 }, { "epoch": 0.08305362579033643, "grad_norm": 0.7800264358520508, "learning_rate": 5e-05, "loss": 0.2419, "step": 266 }, { "epoch": 0.08336585746623995, "grad_norm": 0.781078040599823, "learning_rate": 5e-05, "loss": 0.2425, "step": 267 }, { "epoch": 0.08367808914214347, "grad_norm": 0.7541010975837708, "learning_rate": 5e-05, "loss": 0.244, "step": 268 }, { "epoch": 0.08399032081804698, "grad_norm": 0.747235119342804, "learning_rate": 5e-05, "loss": 0.2347, "step": 269 }, { "epoch": 0.08430255249395051, "grad_norm": 0.731549084186554, "learning_rate": 5e-05, "loss": 0.2239, "step": 270 }, { "epoch": 0.08461478416985403, "grad_norm": 0.7016183733940125, "learning_rate": 5e-05, "loss": 0.2082, "step": 271 }, { "epoch": 0.08492701584575756, "grad_norm": 0.7401699423789978, "learning_rate": 5e-05, "loss": 0.2261, "step": 272 }, { "epoch": 0.08523924752166107, "grad_norm": 0.756777822971344, "learning_rate": 5e-05, "loss": 0.229, "step": 273 }, { "epoch": 0.08555147919756459, "grad_norm": 0.746023952960968, "learning_rate": 5e-05, "loss": 0.2548, "step": 274 }, { "epoch": 0.08586371087346811, "grad_norm": 0.7566107511520386, "learning_rate": 5e-05, "loss": 0.2255, "step": 275 }, { "epoch": 0.08617594254937164, "grad_norm": 0.7458581328392029, "learning_rate": 5e-05, "loss": 0.2502, "step": 276 }, { "epoch": 0.08648817422527516, "grad_norm": 0.7535874247550964, "learning_rate": 5e-05, "loss": 0.2296, "step": 277 }, { "epoch": 0.08680040590117867, "grad_norm": 0.8019295930862427, "learning_rate": 5e-05, "loss": 0.2556, "step": 278 }, { "epoch": 0.0871126375770822, "grad_norm": 0.740929901599884, "learning_rate": 5e-05, "loss": 0.2345, "step": 279 }, { "epoch": 0.08742486925298572, "grad_norm": 0.7502465844154358, "learning_rate": 5e-05, "loss": 0.2387, "step": 280 }, { "epoch": 0.08773710092888924, "grad_norm": 0.7453829050064087, "learning_rate": 5e-05, "loss": 0.2412, "step": 281 }, { "epoch": 0.08804933260479275, "grad_norm": 0.7797332406044006, "learning_rate": 5e-05, "loss": 0.2471, "step": 282 }, { "epoch": 0.08836156428069628, "grad_norm": 0.7122307419776917, "learning_rate": 5e-05, "loss": 0.2271, "step": 283 }, { "epoch": 0.0886737959565998, "grad_norm": 0.72865229845047, "learning_rate": 5e-05, "loss": 0.2177, "step": 284 }, { "epoch": 0.08898602763250332, "grad_norm": 0.7191866040229797, "learning_rate": 5e-05, "loss": 0.2215, "step": 285 }, { "epoch": 0.08929825930840683, "grad_norm": 0.7670174241065979, "learning_rate": 5e-05, "loss": 0.2174, "step": 286 }, { "epoch": 0.08961049098431036, "grad_norm": 0.7428281307220459, "learning_rate": 5e-05, "loss": 0.2368, "step": 287 }, { "epoch": 0.08992272266021388, "grad_norm": 0.77579665184021, "learning_rate": 5e-05, "loss": 0.2481, "step": 288 }, { "epoch": 0.0902349543361174, "grad_norm": 0.7313116192817688, "learning_rate": 5e-05, "loss": 0.2346, "step": 289 }, { "epoch": 0.09054718601202091, "grad_norm": 0.7349814772605896, "learning_rate": 5e-05, "loss": 0.2263, "step": 290 }, { "epoch": 0.09085941768792444, "grad_norm": 0.7555976510047913, "learning_rate": 5e-05, "loss": 0.2373, "step": 291 }, { "epoch": 0.09117164936382796, "grad_norm": 0.8292303681373596, "learning_rate": 5e-05, "loss": 0.2722, "step": 292 }, { "epoch": 0.09148388103973148, "grad_norm": 0.7319760918617249, "learning_rate": 5e-05, "loss": 0.2332, "step": 293 }, { "epoch": 0.091796112715635, "grad_norm": 0.7426495552062988, "learning_rate": 5e-05, "loss": 0.2394, "step": 294 }, { "epoch": 0.09210834439153852, "grad_norm": 0.721731960773468, "learning_rate": 5e-05, "loss": 0.2333, "step": 295 }, { "epoch": 0.09242057606744204, "grad_norm": 0.7624607682228088, "learning_rate": 5e-05, "loss": 0.2329, "step": 296 }, { "epoch": 0.09273280774334557, "grad_norm": 0.7526171803474426, "learning_rate": 5e-05, "loss": 0.2433, "step": 297 }, { "epoch": 0.09304503941924908, "grad_norm": 0.7415574193000793, "learning_rate": 5e-05, "loss": 0.2263, "step": 298 }, { "epoch": 0.0933572710951526, "grad_norm": 0.7495612502098083, "learning_rate": 5e-05, "loss": 0.2314, "step": 299 }, { "epoch": 0.09366950277105612, "grad_norm": 0.79558265209198, "learning_rate": 5e-05, "loss": 0.2587, "step": 300 }, { "epoch": 0.09398173444695965, "grad_norm": 0.7219292521476746, "learning_rate": 5e-05, "loss": 0.2398, "step": 301 }, { "epoch": 0.09429396612286317, "grad_norm": 0.7644978761672974, "learning_rate": 5e-05, "loss": 0.2434, "step": 302 }, { "epoch": 0.09460619779876668, "grad_norm": 0.7187155485153198, "learning_rate": 5e-05, "loss": 0.2343, "step": 303 }, { "epoch": 0.0949184294746702, "grad_norm": 0.7420233488082886, "learning_rate": 5e-05, "loss": 0.2242, "step": 304 }, { "epoch": 0.09523066115057373, "grad_norm": 0.7315407395362854, "learning_rate": 5e-05, "loss": 0.2477, "step": 305 }, { "epoch": 0.09554289282647725, "grad_norm": 0.7031078934669495, "learning_rate": 5e-05, "loss": 0.2346, "step": 306 }, { "epoch": 0.09585512450238076, "grad_norm": 0.7478947639465332, "learning_rate": 5e-05, "loss": 0.2457, "step": 307 }, { "epoch": 0.09616735617828429, "grad_norm": 0.7416651844978333, "learning_rate": 5e-05, "loss": 0.2413, "step": 308 }, { "epoch": 0.09647958785418781, "grad_norm": 0.7390679121017456, "learning_rate": 5e-05, "loss": 0.2453, "step": 309 }, { "epoch": 0.09679181953009133, "grad_norm": 0.7336480617523193, "learning_rate": 5e-05, "loss": 0.2392, "step": 310 }, { "epoch": 0.09710405120599484, "grad_norm": 0.7331861257553101, "learning_rate": 5e-05, "loss": 0.2197, "step": 311 }, { "epoch": 0.09741628288189837, "grad_norm": 0.7856714725494385, "learning_rate": 5e-05, "loss": 0.2582, "step": 312 }, { "epoch": 0.09772851455780189, "grad_norm": 0.7152271866798401, "learning_rate": 5e-05, "loss": 0.2353, "step": 313 }, { "epoch": 0.09804074623370541, "grad_norm": 0.7524024248123169, "learning_rate": 5e-05, "loss": 0.2389, "step": 314 }, { "epoch": 0.09835297790960892, "grad_norm": 0.7539529204368591, "learning_rate": 5e-05, "loss": 0.2308, "step": 315 }, { "epoch": 0.09866520958551245, "grad_norm": 0.7707381248474121, "learning_rate": 5e-05, "loss": 0.2419, "step": 316 }, { "epoch": 0.09897744126141597, "grad_norm": 0.6765215396881104, "learning_rate": 5e-05, "loss": 0.1992, "step": 317 }, { "epoch": 0.0992896729373195, "grad_norm": 0.7512863874435425, "learning_rate": 5e-05, "loss": 0.2456, "step": 318 }, { "epoch": 0.099601904613223, "grad_norm": 0.7190133929252625, "learning_rate": 5e-05, "loss": 0.2461, "step": 319 }, { "epoch": 0.09991413628912653, "grad_norm": 0.7536815404891968, "learning_rate": 5e-05, "loss": 0.2414, "step": 320 }, { "epoch": 0.10022636796503005, "grad_norm": 0.7573649883270264, "learning_rate": 5e-05, "loss": 0.2473, "step": 321 }, { "epoch": 0.10053859964093358, "grad_norm": 0.7137243747711182, "learning_rate": 5e-05, "loss": 0.2267, "step": 322 }, { "epoch": 0.10085083131683709, "grad_norm": 0.756658136844635, "learning_rate": 5e-05, "loss": 0.2327, "step": 323 }, { "epoch": 0.10116306299274061, "grad_norm": 0.7320883274078369, "learning_rate": 5e-05, "loss": 0.2324, "step": 324 }, { "epoch": 0.10147529466864413, "grad_norm": 0.7737743258476257, "learning_rate": 5e-05, "loss": 0.2504, "step": 325 }, { "epoch": 0.10178752634454766, "grad_norm": 0.740436315536499, "learning_rate": 5e-05, "loss": 0.2603, "step": 326 }, { "epoch": 0.10209975802045118, "grad_norm": 0.7749807834625244, "learning_rate": 5e-05, "loss": 0.2491, "step": 327 }, { "epoch": 0.10241198969635469, "grad_norm": 0.7144931554794312, "learning_rate": 5e-05, "loss": 0.223, "step": 328 }, { "epoch": 0.10272422137225821, "grad_norm": 0.7270709872245789, "learning_rate": 5e-05, "loss": 0.2258, "step": 329 }, { "epoch": 0.10303645304816174, "grad_norm": 0.7936622500419617, "learning_rate": 5e-05, "loss": 0.2471, "step": 330 }, { "epoch": 0.10334868472406526, "grad_norm": 0.7226642370223999, "learning_rate": 5e-05, "loss": 0.2422, "step": 331 }, { "epoch": 0.10366091639996877, "grad_norm": 0.6965216994285583, "learning_rate": 5e-05, "loss": 0.2277, "step": 332 }, { "epoch": 0.1039731480758723, "grad_norm": 0.6971994638442993, "learning_rate": 5e-05, "loss": 0.2307, "step": 333 }, { "epoch": 0.10428537975177582, "grad_norm": 0.7614617347717285, "learning_rate": 5e-05, "loss": 0.2577, "step": 334 }, { "epoch": 0.10459761142767934, "grad_norm": 0.7710132002830505, "learning_rate": 5e-05, "loss": 0.2397, "step": 335 }, { "epoch": 0.10490984310358285, "grad_norm": 0.7575166821479797, "learning_rate": 5e-05, "loss": 0.2372, "step": 336 }, { "epoch": 0.10522207477948638, "grad_norm": 0.7122974991798401, "learning_rate": 5e-05, "loss": 0.2322, "step": 337 }, { "epoch": 0.1055343064553899, "grad_norm": 0.7216007113456726, "learning_rate": 5e-05, "loss": 0.2368, "step": 338 }, { "epoch": 0.10584653813129342, "grad_norm": 0.6928422451019287, "learning_rate": 5e-05, "loss": 0.2207, "step": 339 }, { "epoch": 0.10615876980719693, "grad_norm": 0.7032759189605713, "learning_rate": 5e-05, "loss": 0.2236, "step": 340 }, { "epoch": 0.10647100148310046, "grad_norm": 0.7022131085395813, "learning_rate": 5e-05, "loss": 0.2192, "step": 341 }, { "epoch": 0.10678323315900398, "grad_norm": 0.7579840421676636, "learning_rate": 5e-05, "loss": 0.2554, "step": 342 }, { "epoch": 0.1070954648349075, "grad_norm": 0.7317826747894287, "learning_rate": 5e-05, "loss": 0.2323, "step": 343 }, { "epoch": 0.10740769651081102, "grad_norm": 0.7495226263999939, "learning_rate": 5e-05, "loss": 0.2486, "step": 344 }, { "epoch": 0.10771992818671454, "grad_norm": 0.7224180102348328, "learning_rate": 5e-05, "loss": 0.2403, "step": 345 }, { "epoch": 0.10803215986261806, "grad_norm": 0.7131124138832092, "learning_rate": 5e-05, "loss": 0.2343, "step": 346 }, { "epoch": 0.10834439153852159, "grad_norm": 0.6894903182983398, "learning_rate": 5e-05, "loss": 0.2345, "step": 347 }, { "epoch": 0.1086566232144251, "grad_norm": 0.7144549489021301, "learning_rate": 5e-05, "loss": 0.2226, "step": 348 }, { "epoch": 0.10896885489032862, "grad_norm": 0.7960677742958069, "learning_rate": 5e-05, "loss": 0.2571, "step": 349 }, { "epoch": 0.10928108656623214, "grad_norm": 0.7274746298789978, "learning_rate": 5e-05, "loss": 0.2395, "step": 350 }, { "epoch": 0.10959331824213567, "grad_norm": 0.7247001528739929, "learning_rate": 5e-05, "loss": 0.2274, "step": 351 }, { "epoch": 0.10990554991803919, "grad_norm": 0.7061532735824585, "learning_rate": 5e-05, "loss": 0.2311, "step": 352 }, { "epoch": 0.1102177815939427, "grad_norm": 0.7610965967178345, "learning_rate": 5e-05, "loss": 0.2516, "step": 353 }, { "epoch": 0.11053001326984623, "grad_norm": 0.7681392431259155, "learning_rate": 5e-05, "loss": 0.2249, "step": 354 }, { "epoch": 0.11084224494574975, "grad_norm": 0.6864320039749146, "learning_rate": 5e-05, "loss": 0.2352, "step": 355 }, { "epoch": 0.11115447662165327, "grad_norm": 0.7186320424079895, "learning_rate": 5e-05, "loss": 0.2351, "step": 356 }, { "epoch": 0.11146670829755678, "grad_norm": 0.6994982361793518, "learning_rate": 5e-05, "loss": 0.2159, "step": 357 }, { "epoch": 0.1117789399734603, "grad_norm": 0.730311393737793, "learning_rate": 5e-05, "loss": 0.2447, "step": 358 }, { "epoch": 0.11209117164936383, "grad_norm": 0.7018543481826782, "learning_rate": 5e-05, "loss": 0.2412, "step": 359 }, { "epoch": 0.11240340332526735, "grad_norm": 0.7057743072509766, "learning_rate": 5e-05, "loss": 0.234, "step": 360 }, { "epoch": 0.11271563500117086, "grad_norm": 0.7452568411827087, "learning_rate": 5e-05, "loss": 0.2424, "step": 361 }, { "epoch": 0.11302786667707439, "grad_norm": 0.6739805340766907, "learning_rate": 5e-05, "loss": 0.2224, "step": 362 }, { "epoch": 0.11334009835297791, "grad_norm": 0.702228844165802, "learning_rate": 5e-05, "loss": 0.2349, "step": 363 }, { "epoch": 0.11365233002888143, "grad_norm": 0.7560391426086426, "learning_rate": 5e-05, "loss": 0.2316, "step": 364 }, { "epoch": 0.11396456170478494, "grad_norm": 0.7688949108123779, "learning_rate": 5e-05, "loss": 0.242, "step": 365 }, { "epoch": 0.11427679338068847, "grad_norm": 0.6995238661766052, "learning_rate": 5e-05, "loss": 0.2194, "step": 366 }, { "epoch": 0.11458902505659199, "grad_norm": 0.7103112936019897, "learning_rate": 5e-05, "loss": 0.2323, "step": 367 }, { "epoch": 0.11490125673249552, "grad_norm": 0.7261192202568054, "learning_rate": 5e-05, "loss": 0.2206, "step": 368 }, { "epoch": 0.11521348840839903, "grad_norm": 0.7459685802459717, "learning_rate": 5e-05, "loss": 0.2491, "step": 369 }, { "epoch": 0.11552572008430255, "grad_norm": 0.7673231959342957, "learning_rate": 5e-05, "loss": 0.227, "step": 370 }, { "epoch": 0.11583795176020607, "grad_norm": 0.7382850646972656, "learning_rate": 5e-05, "loss": 0.2306, "step": 371 }, { "epoch": 0.1161501834361096, "grad_norm": 0.8083086609840393, "learning_rate": 5e-05, "loss": 0.2727, "step": 372 }, { "epoch": 0.1164624151120131, "grad_norm": 0.7125214338302612, "learning_rate": 5e-05, "loss": 0.2364, "step": 373 }, { "epoch": 0.11677464678791663, "grad_norm": 0.771496057510376, "learning_rate": 5e-05, "loss": 0.2289, "step": 374 }, { "epoch": 0.11708687846382015, "grad_norm": 0.6791495680809021, "learning_rate": 5e-05, "loss": 0.2187, "step": 375 }, { "epoch": 0.11739911013972368, "grad_norm": 0.7387202382087708, "learning_rate": 5e-05, "loss": 0.2475, "step": 376 }, { "epoch": 0.1177113418156272, "grad_norm": 0.6935343742370605, "learning_rate": 5e-05, "loss": 0.2222, "step": 377 }, { "epoch": 0.11802357349153071, "grad_norm": 0.7236191630363464, "learning_rate": 5e-05, "loss": 0.2121, "step": 378 }, { "epoch": 0.11833580516743424, "grad_norm": 0.7310509085655212, "learning_rate": 5e-05, "loss": 0.2307, "step": 379 }, { "epoch": 0.11864803684333776, "grad_norm": 0.7252143025398254, "learning_rate": 5e-05, "loss": 0.2347, "step": 380 }, { "epoch": 0.11896026851924128, "grad_norm": 0.7602221369743347, "learning_rate": 5e-05, "loss": 0.2397, "step": 381 }, { "epoch": 0.11927250019514479, "grad_norm": 0.7441439032554626, "learning_rate": 5e-05, "loss": 0.2401, "step": 382 }, { "epoch": 0.11958473187104832, "grad_norm": 0.7487204670906067, "learning_rate": 5e-05, "loss": 0.259, "step": 383 }, { "epoch": 0.11989696354695184, "grad_norm": 0.6990290284156799, "learning_rate": 5e-05, "loss": 0.2298, "step": 384 }, { "epoch": 0.12020919522285536, "grad_norm": 0.6879537105560303, "learning_rate": 5e-05, "loss": 0.2182, "step": 385 }, { "epoch": 0.12052142689875887, "grad_norm": 0.7398568987846375, "learning_rate": 5e-05, "loss": 0.2383, "step": 386 }, { "epoch": 0.1208336585746624, "grad_norm": 0.7243762612342834, "learning_rate": 5e-05, "loss": 0.2389, "step": 387 }, { "epoch": 0.12114589025056592, "grad_norm": 0.7209830284118652, "learning_rate": 5e-05, "loss": 0.2309, "step": 388 }, { "epoch": 0.12145812192646945, "grad_norm": 0.6878407597541809, "learning_rate": 5e-05, "loss": 0.2259, "step": 389 }, { "epoch": 0.12177035360237295, "grad_norm": 0.7526346445083618, "learning_rate": 5e-05, "loss": 0.2429, "step": 390 }, { "epoch": 0.12208258527827648, "grad_norm": 0.748076856136322, "learning_rate": 5e-05, "loss": 0.2442, "step": 391 }, { "epoch": 0.12239481695418, "grad_norm": 0.7087879776954651, "learning_rate": 5e-05, "loss": 0.2487, "step": 392 }, { "epoch": 0.12270704863008353, "grad_norm": 0.7081765532493591, "learning_rate": 5e-05, "loss": 0.2378, "step": 393 }, { "epoch": 0.12301928030598704, "grad_norm": 0.6859790682792664, "learning_rate": 5e-05, "loss": 0.2097, "step": 394 }, { "epoch": 0.12333151198189056, "grad_norm": 0.7118050456047058, "learning_rate": 5e-05, "loss": 0.2401, "step": 395 }, { "epoch": 0.12364374365779408, "grad_norm": 0.7031238079071045, "learning_rate": 5e-05, "loss": 0.2347, "step": 396 }, { "epoch": 0.12395597533369761, "grad_norm": 0.747264564037323, "learning_rate": 5e-05, "loss": 0.2389, "step": 397 }, { "epoch": 0.12426820700960112, "grad_norm": 0.7136809825897217, "learning_rate": 5e-05, "loss": 0.2189, "step": 398 }, { "epoch": 0.12458043868550464, "grad_norm": 0.7547541260719299, "learning_rate": 5e-05, "loss": 0.2313, "step": 399 }, { "epoch": 0.12489267036140816, "grad_norm": 0.7315170168876648, "learning_rate": 5e-05, "loss": 0.2374, "step": 400 }, { "epoch": 0.1252049020373117, "grad_norm": 0.6908106207847595, "learning_rate": 5e-05, "loss": 0.2051, "step": 401 }, { "epoch": 0.1255171337132152, "grad_norm": 0.7567368149757385, "learning_rate": 5e-05, "loss": 0.2327, "step": 402 }, { "epoch": 0.12582936538911874, "grad_norm": 0.7322911024093628, "learning_rate": 5e-05, "loss": 0.231, "step": 403 }, { "epoch": 0.12614159706502226, "grad_norm": 0.7224327921867371, "learning_rate": 5e-05, "loss": 0.2242, "step": 404 }, { "epoch": 0.12645382874092576, "grad_norm": 0.7269308567047119, "learning_rate": 5e-05, "loss": 0.23, "step": 405 }, { "epoch": 0.12676606041682928, "grad_norm": 0.71408611536026, "learning_rate": 5e-05, "loss": 0.2448, "step": 406 }, { "epoch": 0.1270782920927328, "grad_norm": 0.7468221187591553, "learning_rate": 5e-05, "loss": 0.2247, "step": 407 }, { "epoch": 0.12739052376863633, "grad_norm": 0.7114073634147644, "learning_rate": 5e-05, "loss": 0.218, "step": 408 }, { "epoch": 0.12770275544453985, "grad_norm": 0.7086378931999207, "learning_rate": 5e-05, "loss": 0.2313, "step": 409 }, { "epoch": 0.12801498712044337, "grad_norm": 0.696743369102478, "learning_rate": 5e-05, "loss": 0.2442, "step": 410 }, { "epoch": 0.1283272187963469, "grad_norm": 0.7239034175872803, "learning_rate": 5e-05, "loss": 0.2325, "step": 411 }, { "epoch": 0.12863945047225042, "grad_norm": 0.7103134393692017, "learning_rate": 5e-05, "loss": 0.2157, "step": 412 }, { "epoch": 0.12895168214815392, "grad_norm": 0.656802773475647, "learning_rate": 5e-05, "loss": 0.2114, "step": 413 }, { "epoch": 0.12926391382405744, "grad_norm": 0.7135592103004456, "learning_rate": 5e-05, "loss": 0.2267, "step": 414 }, { "epoch": 0.12957614549996097, "grad_norm": 0.7357749342918396, "learning_rate": 5e-05, "loss": 0.2362, "step": 415 }, { "epoch": 0.1298883771758645, "grad_norm": 0.7628867030143738, "learning_rate": 5e-05, "loss": 0.2362, "step": 416 }, { "epoch": 0.130200608851768, "grad_norm": 0.722221314907074, "learning_rate": 5e-05, "loss": 0.24, "step": 417 }, { "epoch": 0.13051284052767154, "grad_norm": 0.7207236289978027, "learning_rate": 5e-05, "loss": 0.2454, "step": 418 }, { "epoch": 0.13082507220357506, "grad_norm": 0.7257908582687378, "learning_rate": 5e-05, "loss": 0.2239, "step": 419 }, { "epoch": 0.13113730387947858, "grad_norm": 0.7519184350967407, "learning_rate": 5e-05, "loss": 0.2592, "step": 420 }, { "epoch": 0.13144953555538208, "grad_norm": 0.7341853976249695, "learning_rate": 5e-05, "loss": 0.2369, "step": 421 }, { "epoch": 0.1317617672312856, "grad_norm": 0.7121981382369995, "learning_rate": 5e-05, "loss": 0.2346, "step": 422 }, { "epoch": 0.13207399890718913, "grad_norm": 0.7084437012672424, "learning_rate": 5e-05, "loss": 0.2439, "step": 423 }, { "epoch": 0.13238623058309265, "grad_norm": 0.7157992720603943, "learning_rate": 5e-05, "loss": 0.2291, "step": 424 }, { "epoch": 0.13269846225899617, "grad_norm": 0.7339060306549072, "learning_rate": 5e-05, "loss": 0.2299, "step": 425 }, { "epoch": 0.1330106939348997, "grad_norm": 0.7371565103530884, "learning_rate": 5e-05, "loss": 0.2458, "step": 426 }, { "epoch": 0.13332292561080322, "grad_norm": 0.7426894903182983, "learning_rate": 5e-05, "loss": 0.2294, "step": 427 }, { "epoch": 0.13363515728670675, "grad_norm": 0.6923856139183044, "learning_rate": 5e-05, "loss": 0.2344, "step": 428 }, { "epoch": 0.13394738896261027, "grad_norm": 0.7081928253173828, "learning_rate": 5e-05, "loss": 0.2262, "step": 429 }, { "epoch": 0.13425962063851377, "grad_norm": 0.7115813493728638, "learning_rate": 5e-05, "loss": 0.2382, "step": 430 }, { "epoch": 0.1345718523144173, "grad_norm": 0.7104718685150146, "learning_rate": 5e-05, "loss": 0.2282, "step": 431 }, { "epoch": 0.1348840839903208, "grad_norm": 0.7595230937004089, "learning_rate": 5e-05, "loss": 0.2423, "step": 432 }, { "epoch": 0.13519631566622434, "grad_norm": 0.7203737497329712, "learning_rate": 5e-05, "loss": 0.2472, "step": 433 }, { "epoch": 0.13550854734212786, "grad_norm": 0.7062506079673767, "learning_rate": 5e-05, "loss": 0.2276, "step": 434 }, { "epoch": 0.13582077901803138, "grad_norm": 0.6907111406326294, "learning_rate": 5e-05, "loss": 0.2267, "step": 435 }, { "epoch": 0.1361330106939349, "grad_norm": 0.6592497825622559, "learning_rate": 5e-05, "loss": 0.2138, "step": 436 }, { "epoch": 0.13644524236983843, "grad_norm": 0.7172386050224304, "learning_rate": 5e-05, "loss": 0.2399, "step": 437 }, { "epoch": 0.13675747404574193, "grad_norm": 0.7157382369041443, "learning_rate": 5e-05, "loss": 0.2145, "step": 438 }, { "epoch": 0.13706970572164545, "grad_norm": 0.7709649205207825, "learning_rate": 5e-05, "loss": 0.2224, "step": 439 }, { "epoch": 0.13738193739754898, "grad_norm": 0.714118242263794, "learning_rate": 5e-05, "loss": 0.2241, "step": 440 }, { "epoch": 0.1376941690734525, "grad_norm": 0.6757933497428894, "learning_rate": 5e-05, "loss": 0.2111, "step": 441 }, { "epoch": 0.13800640074935602, "grad_norm": 0.7393305897712708, "learning_rate": 5e-05, "loss": 0.2356, "step": 442 }, { "epoch": 0.13831863242525955, "grad_norm": 0.7170238494873047, "learning_rate": 5e-05, "loss": 0.2349, "step": 443 }, { "epoch": 0.13863086410116307, "grad_norm": 0.7143763303756714, "learning_rate": 5e-05, "loss": 0.2428, "step": 444 }, { "epoch": 0.1389430957770666, "grad_norm": 0.6513913869857788, "learning_rate": 5e-05, "loss": 0.2015, "step": 445 }, { "epoch": 0.1392553274529701, "grad_norm": 0.7162613868713379, "learning_rate": 5e-05, "loss": 0.2436, "step": 446 }, { "epoch": 0.13956755912887361, "grad_norm": 0.6594113111495972, "learning_rate": 5e-05, "loss": 0.2162, "step": 447 }, { "epoch": 0.13987979080477714, "grad_norm": 0.6931014657020569, "learning_rate": 5e-05, "loss": 0.2235, "step": 448 }, { "epoch": 0.14019202248068066, "grad_norm": 0.7784932255744934, "learning_rate": 5e-05, "loss": 0.2328, "step": 449 }, { "epoch": 0.14050425415658419, "grad_norm": 0.8310526013374329, "learning_rate": 5e-05, "loss": 0.2582, "step": 450 }, { "epoch": 0.1408164858324877, "grad_norm": 0.689359188079834, "learning_rate": 5e-05, "loss": 0.2234, "step": 451 }, { "epoch": 0.14112871750839123, "grad_norm": 0.7205532789230347, "learning_rate": 5e-05, "loss": 0.2404, "step": 452 }, { "epoch": 0.14144094918429476, "grad_norm": 0.6981039047241211, "learning_rate": 5e-05, "loss": 0.2403, "step": 453 }, { "epoch": 0.14175318086019828, "grad_norm": 0.6603981852531433, "learning_rate": 5e-05, "loss": 0.2154, "step": 454 }, { "epoch": 0.14206541253610178, "grad_norm": 0.7295742630958557, "learning_rate": 5e-05, "loss": 0.2458, "step": 455 }, { "epoch": 0.1423776442120053, "grad_norm": 0.670513927936554, "learning_rate": 5e-05, "loss": 0.228, "step": 456 }, { "epoch": 0.14268987588790882, "grad_norm": 0.6766762733459473, "learning_rate": 5e-05, "loss": 0.2231, "step": 457 }, { "epoch": 0.14300210756381235, "grad_norm": 0.6933634877204895, "learning_rate": 5e-05, "loss": 0.2393, "step": 458 }, { "epoch": 0.14331433923971587, "grad_norm": 0.7452069520950317, "learning_rate": 5e-05, "loss": 0.2264, "step": 459 }, { "epoch": 0.1436265709156194, "grad_norm": 0.7351062297821045, "learning_rate": 5e-05, "loss": 0.2408, "step": 460 }, { "epoch": 0.14393880259152292, "grad_norm": 0.702573835849762, "learning_rate": 5e-05, "loss": 0.2421, "step": 461 }, { "epoch": 0.14425103426742644, "grad_norm": 0.7342304587364197, "learning_rate": 5e-05, "loss": 0.2402, "step": 462 }, { "epoch": 0.14456326594332994, "grad_norm": 0.7064409852027893, "learning_rate": 5e-05, "loss": 0.2404, "step": 463 }, { "epoch": 0.14487549761923346, "grad_norm": 0.7448993921279907, "learning_rate": 5e-05, "loss": 0.24, "step": 464 }, { "epoch": 0.14518772929513699, "grad_norm": 0.7195809483528137, "learning_rate": 5e-05, "loss": 0.2108, "step": 465 }, { "epoch": 0.1454999609710405, "grad_norm": 0.7377538084983826, "learning_rate": 5e-05, "loss": 0.2497, "step": 466 }, { "epoch": 0.14581219264694403, "grad_norm": 0.7143990397453308, "learning_rate": 5e-05, "loss": 0.2346, "step": 467 }, { "epoch": 0.14612442432284756, "grad_norm": 0.719973087310791, "learning_rate": 5e-05, "loss": 0.2238, "step": 468 }, { "epoch": 0.14643665599875108, "grad_norm": 0.7181503176689148, "learning_rate": 5e-05, "loss": 0.2302, "step": 469 }, { "epoch": 0.1467488876746546, "grad_norm": 0.7810243964195251, "learning_rate": 5e-05, "loss": 0.2608, "step": 470 }, { "epoch": 0.1470611193505581, "grad_norm": 0.7173299193382263, "learning_rate": 5e-05, "loss": 0.2416, "step": 471 }, { "epoch": 0.14737335102646162, "grad_norm": 0.6894776821136475, "learning_rate": 5e-05, "loss": 0.2256, "step": 472 }, { "epoch": 0.14768558270236515, "grad_norm": 0.6694115400314331, "learning_rate": 5e-05, "loss": 0.2182, "step": 473 }, { "epoch": 0.14799781437826867, "grad_norm": 0.708406388759613, "learning_rate": 5e-05, "loss": 0.2378, "step": 474 }, { "epoch": 0.1483100460541722, "grad_norm": 0.6774315237998962, "learning_rate": 5e-05, "loss": 0.2299, "step": 475 }, { "epoch": 0.14862227773007572, "grad_norm": 0.6986003518104553, "learning_rate": 5e-05, "loss": 0.2531, "step": 476 }, { "epoch": 0.14893450940597924, "grad_norm": 0.6678252220153809, "learning_rate": 5e-05, "loss": 0.2297, "step": 477 }, { "epoch": 0.14924674108188277, "grad_norm": 0.6832705736160278, "learning_rate": 5e-05, "loss": 0.2293, "step": 478 }, { "epoch": 0.1495589727577863, "grad_norm": 0.7063206434249878, "learning_rate": 5e-05, "loss": 0.2511, "step": 479 }, { "epoch": 0.1498712044336898, "grad_norm": 0.7123210430145264, "learning_rate": 5e-05, "loss": 0.2468, "step": 480 }, { "epoch": 0.1501834361095933, "grad_norm": 0.7311363220214844, "learning_rate": 5e-05, "loss": 0.2212, "step": 481 }, { "epoch": 0.15049566778549683, "grad_norm": 0.6783101558685303, "learning_rate": 5e-05, "loss": 0.2288, "step": 482 }, { "epoch": 0.15080789946140036, "grad_norm": 0.6952267289161682, "learning_rate": 5e-05, "loss": 0.2289, "step": 483 }, { "epoch": 0.15112013113730388, "grad_norm": 0.7377178072929382, "learning_rate": 5e-05, "loss": 0.2475, "step": 484 }, { "epoch": 0.1514323628132074, "grad_norm": 0.7447988390922546, "learning_rate": 5e-05, "loss": 0.2279, "step": 485 }, { "epoch": 0.15174459448911093, "grad_norm": 0.7274515628814697, "learning_rate": 5e-05, "loss": 0.2488, "step": 486 }, { "epoch": 0.15205682616501445, "grad_norm": 0.744928240776062, "learning_rate": 5e-05, "loss": 0.2355, "step": 487 }, { "epoch": 0.15236905784091795, "grad_norm": 0.7154725790023804, "learning_rate": 5e-05, "loss": 0.2284, "step": 488 }, { "epoch": 0.15268128951682147, "grad_norm": 0.7352346777915955, "learning_rate": 5e-05, "loss": 0.2538, "step": 489 }, { "epoch": 0.152993521192725, "grad_norm": 0.7237861156463623, "learning_rate": 5e-05, "loss": 0.243, "step": 490 }, { "epoch": 0.15330575286862852, "grad_norm": 0.7109993100166321, "learning_rate": 5e-05, "loss": 0.2245, "step": 491 }, { "epoch": 0.15361798454453204, "grad_norm": 0.6763957142829895, "learning_rate": 5e-05, "loss": 0.2156, "step": 492 }, { "epoch": 0.15393021622043557, "grad_norm": 0.7253519296646118, "learning_rate": 5e-05, "loss": 0.2524, "step": 493 }, { "epoch": 0.1542424478963391, "grad_norm": 0.6948341131210327, "learning_rate": 5e-05, "loss": 0.2246, "step": 494 }, { "epoch": 0.15455467957224261, "grad_norm": 0.6986634731292725, "learning_rate": 5e-05, "loss": 0.2291, "step": 495 }, { "epoch": 0.1548669112481461, "grad_norm": 0.715346097946167, "learning_rate": 5e-05, "loss": 0.2318, "step": 496 }, { "epoch": 0.15517914292404963, "grad_norm": 0.7285166382789612, "learning_rate": 5e-05, "loss": 0.2374, "step": 497 }, { "epoch": 0.15549137459995316, "grad_norm": 0.6709636449813843, "learning_rate": 5e-05, "loss": 0.2137, "step": 498 }, { "epoch": 0.15580360627585668, "grad_norm": 0.7208456993103027, "learning_rate": 5e-05, "loss": 0.2442, "step": 499 }, { "epoch": 0.1561158379517602, "grad_norm": 0.7396925687789917, "learning_rate": 5e-05, "loss": 0.2327, "step": 500 }, { "epoch": 0.15642806962766373, "grad_norm": 0.6835615038871765, "learning_rate": 5e-05, "loss": 0.2274, "step": 501 }, { "epoch": 0.15674030130356725, "grad_norm": 0.6971964240074158, "learning_rate": 5e-05, "loss": 0.2227, "step": 502 }, { "epoch": 0.15705253297947078, "grad_norm": 0.7199394106864929, "learning_rate": 5e-05, "loss": 0.2213, "step": 503 }, { "epoch": 0.1573647646553743, "grad_norm": 0.6605077981948853, "learning_rate": 5e-05, "loss": 0.2101, "step": 504 }, { "epoch": 0.1576769963312778, "grad_norm": 0.6660066843032837, "learning_rate": 5e-05, "loss": 0.2152, "step": 505 }, { "epoch": 0.15798922800718132, "grad_norm": 0.7061495184898376, "learning_rate": 5e-05, "loss": 0.2285, "step": 506 }, { "epoch": 0.15830145968308484, "grad_norm": 0.7248945236206055, "learning_rate": 5e-05, "loss": 0.2473, "step": 507 }, { "epoch": 0.15861369135898837, "grad_norm": 0.7076839804649353, "learning_rate": 5e-05, "loss": 0.2269, "step": 508 }, { "epoch": 0.1589259230348919, "grad_norm": 0.7106477618217468, "learning_rate": 5e-05, "loss": 0.238, "step": 509 }, { "epoch": 0.15923815471079542, "grad_norm": 0.7282993793487549, "learning_rate": 5e-05, "loss": 0.2256, "step": 510 }, { "epoch": 0.15955038638669894, "grad_norm": 0.7009677290916443, "learning_rate": 5e-05, "loss": 0.2189, "step": 511 }, { "epoch": 0.15986261806260246, "grad_norm": 0.6563753485679626, "learning_rate": 5e-05, "loss": 0.23, "step": 512 }, { "epoch": 0.16017484973850596, "grad_norm": 0.6885831952095032, "learning_rate": 5e-05, "loss": 0.2251, "step": 513 }, { "epoch": 0.16048708141440948, "grad_norm": 0.6851077079772949, "learning_rate": 5e-05, "loss": 0.2197, "step": 514 }, { "epoch": 0.160799313090313, "grad_norm": 0.7163271307945251, "learning_rate": 5e-05, "loss": 0.2088, "step": 515 }, { "epoch": 0.16111154476621653, "grad_norm": 0.6805230975151062, "learning_rate": 5e-05, "loss": 0.2321, "step": 516 }, { "epoch": 0.16142377644212005, "grad_norm": 0.7143459320068359, "learning_rate": 5e-05, "loss": 0.2302, "step": 517 }, { "epoch": 0.16173600811802358, "grad_norm": 0.7365829944610596, "learning_rate": 5e-05, "loss": 0.2478, "step": 518 }, { "epoch": 0.1620482397939271, "grad_norm": 0.7131930589675903, "learning_rate": 5e-05, "loss": 0.2438, "step": 519 }, { "epoch": 0.16236047146983063, "grad_norm": 0.6971017718315125, "learning_rate": 5e-05, "loss": 0.241, "step": 520 }, { "epoch": 0.16267270314573412, "grad_norm": 0.7018243074417114, "learning_rate": 5e-05, "loss": 0.2193, "step": 521 }, { "epoch": 0.16298493482163764, "grad_norm": 0.6889275908470154, "learning_rate": 5e-05, "loss": 0.222, "step": 522 }, { "epoch": 0.16329716649754117, "grad_norm": 0.6834130883216858, "learning_rate": 5e-05, "loss": 0.2313, "step": 523 }, { "epoch": 0.1636093981734447, "grad_norm": 0.7008233666419983, "learning_rate": 5e-05, "loss": 0.2301, "step": 524 }, { "epoch": 0.16392162984934822, "grad_norm": 0.7295887470245361, "learning_rate": 5e-05, "loss": 0.2319, "step": 525 }, { "epoch": 0.16423386152525174, "grad_norm": 0.7019155621528625, "learning_rate": 5e-05, "loss": 0.2295, "step": 526 }, { "epoch": 0.16454609320115526, "grad_norm": 0.7173136472702026, "learning_rate": 5e-05, "loss": 0.248, "step": 527 }, { "epoch": 0.1648583248770588, "grad_norm": 0.6957157850265503, "learning_rate": 5e-05, "loss": 0.2278, "step": 528 }, { "epoch": 0.1651705565529623, "grad_norm": 0.681171715259552, "learning_rate": 5e-05, "loss": 0.2468, "step": 529 }, { "epoch": 0.1654827882288658, "grad_norm": 0.7214524149894714, "learning_rate": 5e-05, "loss": 0.2382, "step": 530 }, { "epoch": 0.16579501990476933, "grad_norm": 0.6964455246925354, "learning_rate": 5e-05, "loss": 0.2236, "step": 531 }, { "epoch": 0.16610725158067285, "grad_norm": 0.6776644587516785, "learning_rate": 5e-05, "loss": 0.216, "step": 532 }, { "epoch": 0.16641948325657638, "grad_norm": 0.6961236000061035, "learning_rate": 5e-05, "loss": 0.2318, "step": 533 }, { "epoch": 0.1667317149324799, "grad_norm": 0.7208872437477112, "learning_rate": 5e-05, "loss": 0.2493, "step": 534 }, { "epoch": 0.16704394660838343, "grad_norm": 0.7044702172279358, "learning_rate": 5e-05, "loss": 0.2139, "step": 535 }, { "epoch": 0.16735617828428695, "grad_norm": 0.7632554173469543, "learning_rate": 5e-05, "loss": 0.2388, "step": 536 }, { "epoch": 0.16766840996019047, "grad_norm": 0.7497281432151794, "learning_rate": 5e-05, "loss": 0.2357, "step": 537 }, { "epoch": 0.16798064163609397, "grad_norm": 0.7179984450340271, "learning_rate": 5e-05, "loss": 0.2286, "step": 538 }, { "epoch": 0.1682928733119975, "grad_norm": 0.7070432901382446, "learning_rate": 5e-05, "loss": 0.227, "step": 539 }, { "epoch": 0.16860510498790102, "grad_norm": 0.6971911191940308, "learning_rate": 5e-05, "loss": 0.2266, "step": 540 }, { "epoch": 0.16891733666380454, "grad_norm": 0.7330077290534973, "learning_rate": 5e-05, "loss": 0.2342, "step": 541 }, { "epoch": 0.16922956833970806, "grad_norm": 0.7007275223731995, "learning_rate": 5e-05, "loss": 0.2141, "step": 542 }, { "epoch": 0.1695418000156116, "grad_norm": 0.7161493301391602, "learning_rate": 5e-05, "loss": 0.2215, "step": 543 }, { "epoch": 0.1698540316915151, "grad_norm": 0.7144114375114441, "learning_rate": 5e-05, "loss": 0.253, "step": 544 }, { "epoch": 0.17016626336741864, "grad_norm": 0.7170636057853699, "learning_rate": 5e-05, "loss": 0.2448, "step": 545 }, { "epoch": 0.17047849504332213, "grad_norm": 0.7192121744155884, "learning_rate": 5e-05, "loss": 0.2128, "step": 546 }, { "epoch": 0.17079072671922566, "grad_norm": 0.6811600923538208, "learning_rate": 5e-05, "loss": 0.2188, "step": 547 }, { "epoch": 0.17110295839512918, "grad_norm": 0.692975640296936, "learning_rate": 5e-05, "loss": 0.2368, "step": 548 }, { "epoch": 0.1714151900710327, "grad_norm": 0.6639841198921204, "learning_rate": 5e-05, "loss": 0.2186, "step": 549 }, { "epoch": 0.17172742174693623, "grad_norm": 0.6846618056297302, "learning_rate": 5e-05, "loss": 0.2448, "step": 550 }, { "epoch": 0.17203965342283975, "grad_norm": 0.7376691699028015, "learning_rate": 5e-05, "loss": 0.2427, "step": 551 }, { "epoch": 0.17235188509874327, "grad_norm": 0.7240331768989563, "learning_rate": 5e-05, "loss": 0.2511, "step": 552 }, { "epoch": 0.1726641167746468, "grad_norm": 0.7151737213134766, "learning_rate": 5e-05, "loss": 0.2464, "step": 553 }, { "epoch": 0.17297634845055032, "grad_norm": 0.6871690154075623, "learning_rate": 5e-05, "loss": 0.2344, "step": 554 }, { "epoch": 0.17328858012645382, "grad_norm": 0.6642646193504333, "learning_rate": 5e-05, "loss": 0.2385, "step": 555 }, { "epoch": 0.17360081180235734, "grad_norm": 0.6807365417480469, "learning_rate": 5e-05, "loss": 0.22, "step": 556 }, { "epoch": 0.17391304347826086, "grad_norm": 0.7773380875587463, "learning_rate": 5e-05, "loss": 0.2539, "step": 557 }, { "epoch": 0.1742252751541644, "grad_norm": 0.7605475187301636, "learning_rate": 5e-05, "loss": 0.2564, "step": 558 }, { "epoch": 0.1745375068300679, "grad_norm": 0.6895463466644287, "learning_rate": 5e-05, "loss": 0.2245, "step": 559 }, { "epoch": 0.17484973850597144, "grad_norm": 0.6951895356178284, "learning_rate": 5e-05, "loss": 0.238, "step": 560 }, { "epoch": 0.17516197018187496, "grad_norm": 0.6947619915008545, "learning_rate": 5e-05, "loss": 0.2308, "step": 561 }, { "epoch": 0.17547420185777848, "grad_norm": 0.6888923645019531, "learning_rate": 5e-05, "loss": 0.2347, "step": 562 }, { "epoch": 0.17578643353368198, "grad_norm": 0.6804299354553223, "learning_rate": 5e-05, "loss": 0.2363, "step": 563 }, { "epoch": 0.1760986652095855, "grad_norm": 0.7033458352088928, "learning_rate": 5e-05, "loss": 0.2167, "step": 564 }, { "epoch": 0.17641089688548903, "grad_norm": 0.7298039793968201, "learning_rate": 5e-05, "loss": 0.2501, "step": 565 }, { "epoch": 0.17672312856139255, "grad_norm": 0.7062491774559021, "learning_rate": 5e-05, "loss": 0.2333, "step": 566 }, { "epoch": 0.17703536023729607, "grad_norm": 0.6574543714523315, "learning_rate": 5e-05, "loss": 0.2206, "step": 567 }, { "epoch": 0.1773475919131996, "grad_norm": 0.6745909452438354, "learning_rate": 5e-05, "loss": 0.2183, "step": 568 }, { "epoch": 0.17765982358910312, "grad_norm": 0.7153804898262024, "learning_rate": 5e-05, "loss": 0.2349, "step": 569 }, { "epoch": 0.17797205526500665, "grad_norm": 0.6866992712020874, "learning_rate": 5e-05, "loss": 0.2154, "step": 570 }, { "epoch": 0.17828428694091014, "grad_norm": 0.7246587872505188, "learning_rate": 5e-05, "loss": 0.2379, "step": 571 }, { "epoch": 0.17859651861681367, "grad_norm": 0.7108790874481201, "learning_rate": 5e-05, "loss": 0.2419, "step": 572 }, { "epoch": 0.1789087502927172, "grad_norm": 0.6966478824615479, "learning_rate": 5e-05, "loss": 0.228, "step": 573 }, { "epoch": 0.1792209819686207, "grad_norm": 0.6528505682945251, "learning_rate": 5e-05, "loss": 0.2123, "step": 574 }, { "epoch": 0.17953321364452424, "grad_norm": 0.7059011459350586, "learning_rate": 5e-05, "loss": 0.2278, "step": 575 }, { "epoch": 0.17984544532042776, "grad_norm": 0.7168601155281067, "learning_rate": 5e-05, "loss": 0.2244, "step": 576 }, { "epoch": 0.18015767699633128, "grad_norm": 0.706027626991272, "learning_rate": 5e-05, "loss": 0.2289, "step": 577 }, { "epoch": 0.1804699086722348, "grad_norm": 0.6695789694786072, "learning_rate": 5e-05, "loss": 0.2203, "step": 578 }, { "epoch": 0.18078214034813833, "grad_norm": 0.7127575874328613, "learning_rate": 5e-05, "loss": 0.2467, "step": 579 }, { "epoch": 0.18109437202404183, "grad_norm": 0.7185153365135193, "learning_rate": 5e-05, "loss": 0.2252, "step": 580 }, { "epoch": 0.18140660369994535, "grad_norm": 0.6907889246940613, "learning_rate": 5e-05, "loss": 0.2337, "step": 581 }, { "epoch": 0.18171883537584888, "grad_norm": 0.6773670315742493, "learning_rate": 5e-05, "loss": 0.2177, "step": 582 }, { "epoch": 0.1820310670517524, "grad_norm": 0.676783561706543, "learning_rate": 5e-05, "loss": 0.2201, "step": 583 }, { "epoch": 0.18234329872765592, "grad_norm": 0.7353306412696838, "learning_rate": 5e-05, "loss": 0.2435, "step": 584 }, { "epoch": 0.18265553040355945, "grad_norm": 0.7334724068641663, "learning_rate": 5e-05, "loss": 0.2409, "step": 585 }, { "epoch": 0.18296776207946297, "grad_norm": 0.6928703188896179, "learning_rate": 5e-05, "loss": 0.2263, "step": 586 }, { "epoch": 0.1832799937553665, "grad_norm": 0.6937906742095947, "learning_rate": 5e-05, "loss": 0.2256, "step": 587 }, { "epoch": 0.18359222543127, "grad_norm": 0.6717407703399658, "learning_rate": 5e-05, "loss": 0.2239, "step": 588 }, { "epoch": 0.1839044571071735, "grad_norm": 0.6520743370056152, "learning_rate": 5e-05, "loss": 0.2198, "step": 589 }, { "epoch": 0.18421668878307704, "grad_norm": 0.6969377398490906, "learning_rate": 5e-05, "loss": 0.2212, "step": 590 }, { "epoch": 0.18452892045898056, "grad_norm": 0.691193699836731, "learning_rate": 5e-05, "loss": 0.2409, "step": 591 }, { "epoch": 0.18484115213488408, "grad_norm": 0.7161620259284973, "learning_rate": 5e-05, "loss": 0.2204, "step": 592 }, { "epoch": 0.1851533838107876, "grad_norm": 0.7171253561973572, "learning_rate": 5e-05, "loss": 0.2361, "step": 593 }, { "epoch": 0.18546561548669113, "grad_norm": 0.6756821870803833, "learning_rate": 5e-05, "loss": 0.2184, "step": 594 }, { "epoch": 0.18577784716259466, "grad_norm": 0.6939188241958618, "learning_rate": 5e-05, "loss": 0.2357, "step": 595 }, { "epoch": 0.18609007883849815, "grad_norm": 0.6563978791236877, "learning_rate": 5e-05, "loss": 0.2262, "step": 596 }, { "epoch": 0.18640231051440168, "grad_norm": 0.6993412971496582, "learning_rate": 5e-05, "loss": 0.2349, "step": 597 }, { "epoch": 0.1867145421903052, "grad_norm": 0.6617957353591919, "learning_rate": 5e-05, "loss": 0.2252, "step": 598 }, { "epoch": 0.18702677386620872, "grad_norm": 0.7053597569465637, "learning_rate": 5e-05, "loss": 0.2355, "step": 599 }, { "epoch": 0.18733900554211225, "grad_norm": 0.6998315453529358, "learning_rate": 5e-05, "loss": 0.2482, "step": 600 }, { "epoch": 0.18765123721801577, "grad_norm": 0.6833526492118835, "learning_rate": 5e-05, "loss": 0.2179, "step": 601 }, { "epoch": 0.1879634688939193, "grad_norm": 0.6405429840087891, "learning_rate": 5e-05, "loss": 0.2117, "step": 602 }, { "epoch": 0.18827570056982282, "grad_norm": 0.6799123287200928, "learning_rate": 5e-05, "loss": 0.233, "step": 603 }, { "epoch": 0.18858793224572634, "grad_norm": 0.6986581087112427, "learning_rate": 5e-05, "loss": 0.2391, "step": 604 }, { "epoch": 0.18890016392162984, "grad_norm": 0.6789010167121887, "learning_rate": 5e-05, "loss": 0.2187, "step": 605 }, { "epoch": 0.18921239559753336, "grad_norm": 0.6905085444450378, "learning_rate": 5e-05, "loss": 0.2245, "step": 606 }, { "epoch": 0.18952462727343689, "grad_norm": 0.6379793286323547, "learning_rate": 5e-05, "loss": 0.2102, "step": 607 }, { "epoch": 0.1898368589493404, "grad_norm": 0.7339112758636475, "learning_rate": 5e-05, "loss": 0.2497, "step": 608 }, { "epoch": 0.19014909062524393, "grad_norm": 0.6893284320831299, "learning_rate": 5e-05, "loss": 0.2283, "step": 609 }, { "epoch": 0.19046132230114746, "grad_norm": 0.6544316411018372, "learning_rate": 5e-05, "loss": 0.2125, "step": 610 }, { "epoch": 0.19077355397705098, "grad_norm": 0.6493269801139832, "learning_rate": 5e-05, "loss": 0.2198, "step": 611 }, { "epoch": 0.1910857856529545, "grad_norm": 0.7032558917999268, "learning_rate": 5e-05, "loss": 0.2208, "step": 612 }, { "epoch": 0.191398017328858, "grad_norm": 0.7296326756477356, "learning_rate": 5e-05, "loss": 0.2523, "step": 613 }, { "epoch": 0.19171024900476152, "grad_norm": 0.7196498513221741, "learning_rate": 5e-05, "loss": 0.241, "step": 614 }, { "epoch": 0.19202248068066505, "grad_norm": 0.7121750116348267, "learning_rate": 5e-05, "loss": 0.2356, "step": 615 }, { "epoch": 0.19233471235656857, "grad_norm": 0.6426519751548767, "learning_rate": 5e-05, "loss": 0.2144, "step": 616 }, { "epoch": 0.1926469440324721, "grad_norm": 0.6986357569694519, "learning_rate": 5e-05, "loss": 0.228, "step": 617 }, { "epoch": 0.19295917570837562, "grad_norm": 0.7052671313285828, "learning_rate": 5e-05, "loss": 0.2437, "step": 618 }, { "epoch": 0.19327140738427914, "grad_norm": 0.6406875252723694, "learning_rate": 5e-05, "loss": 0.2114, "step": 619 }, { "epoch": 0.19358363906018267, "grad_norm": 0.7373706698417664, "learning_rate": 5e-05, "loss": 0.2466, "step": 620 }, { "epoch": 0.19389587073608616, "grad_norm": 0.8028583526611328, "learning_rate": 5e-05, "loss": 0.2618, "step": 621 }, { "epoch": 0.19420810241198969, "grad_norm": 0.6870984435081482, "learning_rate": 5e-05, "loss": 0.2261, "step": 622 }, { "epoch": 0.1945203340878932, "grad_norm": 0.6881847381591797, "learning_rate": 5e-05, "loss": 0.2153, "step": 623 }, { "epoch": 0.19483256576379673, "grad_norm": 0.6881365776062012, "learning_rate": 5e-05, "loss": 0.2178, "step": 624 }, { "epoch": 0.19514479743970026, "grad_norm": 0.7200818657875061, "learning_rate": 5e-05, "loss": 0.2339, "step": 625 }, { "epoch": 0.19545702911560378, "grad_norm": 0.6727520227432251, "learning_rate": 5e-05, "loss": 0.2365, "step": 626 }, { "epoch": 0.1957692607915073, "grad_norm": 0.663469135761261, "learning_rate": 5e-05, "loss": 0.2336, "step": 627 }, { "epoch": 0.19608149246741083, "grad_norm": 0.6899528503417969, "learning_rate": 5e-05, "loss": 0.2312, "step": 628 }, { "epoch": 0.19639372414331435, "grad_norm": 0.6767566204071045, "learning_rate": 5e-05, "loss": 0.2386, "step": 629 }, { "epoch": 0.19670595581921785, "grad_norm": 0.6568947434425354, "learning_rate": 5e-05, "loss": 0.2245, "step": 630 }, { "epoch": 0.19701818749512137, "grad_norm": 0.6842156052589417, "learning_rate": 5e-05, "loss": 0.2143, "step": 631 }, { "epoch": 0.1973304191710249, "grad_norm": 0.7149919867515564, "learning_rate": 5e-05, "loss": 0.2381, "step": 632 }, { "epoch": 0.19764265084692842, "grad_norm": 0.6814210414886475, "learning_rate": 5e-05, "loss": 0.2406, "step": 633 }, { "epoch": 0.19795488252283194, "grad_norm": 0.68083655834198, "learning_rate": 5e-05, "loss": 0.2393, "step": 634 }, { "epoch": 0.19826711419873547, "grad_norm": 0.6812088489532471, "learning_rate": 5e-05, "loss": 0.2251, "step": 635 }, { "epoch": 0.198579345874639, "grad_norm": 0.6789886355400085, "learning_rate": 5e-05, "loss": 0.2275, "step": 636 }, { "epoch": 0.19889157755054251, "grad_norm": 0.7249051332473755, "learning_rate": 5e-05, "loss": 0.2167, "step": 637 }, { "epoch": 0.199203809226446, "grad_norm": 0.7049183249473572, "learning_rate": 5e-05, "loss": 0.2322, "step": 638 }, { "epoch": 0.19951604090234953, "grad_norm": 0.6876125335693359, "learning_rate": 5e-05, "loss": 0.2126, "step": 639 }, { "epoch": 0.19982827257825306, "grad_norm": 0.7145660519599915, "learning_rate": 5e-05, "loss": 0.2222, "step": 640 }, { "epoch": 0.20014050425415658, "grad_norm": 0.6964045166969299, "learning_rate": 5e-05, "loss": 0.2253, "step": 641 }, { "epoch": 0.2004527359300601, "grad_norm": 0.736315906047821, "learning_rate": 5e-05, "loss": 0.243, "step": 642 }, { "epoch": 0.20076496760596363, "grad_norm": 0.7128903865814209, "learning_rate": 5e-05, "loss": 0.2253, "step": 643 }, { "epoch": 0.20107719928186715, "grad_norm": 0.7163657546043396, "learning_rate": 5e-05, "loss": 0.2362, "step": 644 }, { "epoch": 0.20138943095777068, "grad_norm": 0.6780276298522949, "learning_rate": 5e-05, "loss": 0.2388, "step": 645 }, { "epoch": 0.20170166263367417, "grad_norm": 0.6742351055145264, "learning_rate": 5e-05, "loss": 0.2339, "step": 646 }, { "epoch": 0.2020138943095777, "grad_norm": 0.729037880897522, "learning_rate": 5e-05, "loss": 0.2427, "step": 647 }, { "epoch": 0.20232612598548122, "grad_norm": 0.6943097114562988, "learning_rate": 5e-05, "loss": 0.2473, "step": 648 }, { "epoch": 0.20263835766138474, "grad_norm": 0.6844215989112854, "learning_rate": 5e-05, "loss": 0.2268, "step": 649 }, { "epoch": 0.20295058933728827, "grad_norm": 0.7399567365646362, "learning_rate": 5e-05, "loss": 0.232, "step": 650 }, { "epoch": 0.2032628210131918, "grad_norm": 0.7132176160812378, "learning_rate": 5e-05, "loss": 0.2385, "step": 651 }, { "epoch": 0.20357505268909531, "grad_norm": 0.6454074382781982, "learning_rate": 5e-05, "loss": 0.2191, "step": 652 }, { "epoch": 0.20388728436499884, "grad_norm": 0.6959068775177002, "learning_rate": 5e-05, "loss": 0.2217, "step": 653 }, { "epoch": 0.20419951604090236, "grad_norm": 0.6506040096282959, "learning_rate": 5e-05, "loss": 0.2167, "step": 654 }, { "epoch": 0.20451174771680586, "grad_norm": 0.7000797390937805, "learning_rate": 5e-05, "loss": 0.2248, "step": 655 }, { "epoch": 0.20482397939270938, "grad_norm": 0.7238790392875671, "learning_rate": 5e-05, "loss": 0.2178, "step": 656 }, { "epoch": 0.2051362110686129, "grad_norm": 0.6803821325302124, "learning_rate": 5e-05, "loss": 0.2375, "step": 657 }, { "epoch": 0.20544844274451643, "grad_norm": 0.6739383339881897, "learning_rate": 5e-05, "loss": 0.2457, "step": 658 }, { "epoch": 0.20576067442041995, "grad_norm": 0.6633620262145996, "learning_rate": 5e-05, "loss": 0.2227, "step": 659 }, { "epoch": 0.20607290609632348, "grad_norm": 0.6907643675804138, "learning_rate": 5e-05, "loss": 0.2367, "step": 660 }, { "epoch": 0.206385137772227, "grad_norm": 0.7065349817276001, "learning_rate": 5e-05, "loss": 0.2267, "step": 661 }, { "epoch": 0.20669736944813052, "grad_norm": 0.7268231511116028, "learning_rate": 5e-05, "loss": 0.2267, "step": 662 }, { "epoch": 0.20700960112403402, "grad_norm": 0.7222045063972473, "learning_rate": 5e-05, "loss": 0.2424, "step": 663 }, { "epoch": 0.20732183279993754, "grad_norm": 0.7273341417312622, "learning_rate": 5e-05, "loss": 0.2362, "step": 664 }, { "epoch": 0.20763406447584107, "grad_norm": 0.6670531034469604, "learning_rate": 5e-05, "loss": 0.2141, "step": 665 }, { "epoch": 0.2079462961517446, "grad_norm": 0.6629462242126465, "learning_rate": 5e-05, "loss": 0.2284, "step": 666 }, { "epoch": 0.20825852782764812, "grad_norm": 0.6592385768890381, "learning_rate": 5e-05, "loss": 0.2179, "step": 667 }, { "epoch": 0.20857075950355164, "grad_norm": 0.6721193790435791, "learning_rate": 5e-05, "loss": 0.2309, "step": 668 }, { "epoch": 0.20888299117945516, "grad_norm": 0.682852029800415, "learning_rate": 5e-05, "loss": 0.2291, "step": 669 }, { "epoch": 0.2091952228553587, "grad_norm": 0.675503134727478, "learning_rate": 5e-05, "loss": 0.2324, "step": 670 }, { "epoch": 0.20950745453126218, "grad_norm": 0.6940597891807556, "learning_rate": 5e-05, "loss": 0.2408, "step": 671 }, { "epoch": 0.2098196862071657, "grad_norm": 0.7174805402755737, "learning_rate": 5e-05, "loss": 0.2287, "step": 672 }, { "epoch": 0.21013191788306923, "grad_norm": 0.6940304636955261, "learning_rate": 5e-05, "loss": 0.237, "step": 673 }, { "epoch": 0.21044414955897275, "grad_norm": 0.6758440136909485, "learning_rate": 5e-05, "loss": 0.2171, "step": 674 }, { "epoch": 0.21075638123487628, "grad_norm": 0.6556110382080078, "learning_rate": 5e-05, "loss": 0.221, "step": 675 }, { "epoch": 0.2110686129107798, "grad_norm": 0.6564118266105652, "learning_rate": 5e-05, "loss": 0.2264, "step": 676 }, { "epoch": 0.21138084458668333, "grad_norm": 0.6869651079177856, "learning_rate": 5e-05, "loss": 0.2373, "step": 677 }, { "epoch": 0.21169307626258685, "grad_norm": 0.7056733965873718, "learning_rate": 5e-05, "loss": 0.2271, "step": 678 }, { "epoch": 0.21200530793849037, "grad_norm": 0.6117857694625854, "learning_rate": 5e-05, "loss": 0.216, "step": 679 }, { "epoch": 0.21231753961439387, "grad_norm": 0.7038550972938538, "learning_rate": 5e-05, "loss": 0.2335, "step": 680 }, { "epoch": 0.2126297712902974, "grad_norm": 0.7281777262687683, "learning_rate": 5e-05, "loss": 0.2696, "step": 681 }, { "epoch": 0.21294200296620092, "grad_norm": 0.6594808101654053, "learning_rate": 5e-05, "loss": 0.2312, "step": 682 }, { "epoch": 0.21325423464210444, "grad_norm": 0.6638525724411011, "learning_rate": 5e-05, "loss": 0.2185, "step": 683 }, { "epoch": 0.21356646631800796, "grad_norm": 0.7125054001808167, "learning_rate": 5e-05, "loss": 0.2175, "step": 684 }, { "epoch": 0.2138786979939115, "grad_norm": 0.7083826661109924, "learning_rate": 5e-05, "loss": 0.2283, "step": 685 }, { "epoch": 0.214190929669815, "grad_norm": 0.680549681186676, "learning_rate": 5e-05, "loss": 0.2336, "step": 686 }, { "epoch": 0.21450316134571853, "grad_norm": 0.7232916355133057, "learning_rate": 5e-05, "loss": 0.2352, "step": 687 }, { "epoch": 0.21481539302162203, "grad_norm": 0.7509969472885132, "learning_rate": 5e-05, "loss": 0.2418, "step": 688 }, { "epoch": 0.21512762469752555, "grad_norm": 0.7110801935195923, "learning_rate": 5e-05, "loss": 0.246, "step": 689 }, { "epoch": 0.21543985637342908, "grad_norm": 0.6922047138214111, "learning_rate": 5e-05, "loss": 0.2367, "step": 690 }, { "epoch": 0.2157520880493326, "grad_norm": 0.6800710558891296, "learning_rate": 5e-05, "loss": 0.2318, "step": 691 }, { "epoch": 0.21606431972523613, "grad_norm": 0.6881672739982605, "learning_rate": 5e-05, "loss": 0.2313, "step": 692 }, { "epoch": 0.21637655140113965, "grad_norm": 0.6749621629714966, "learning_rate": 5e-05, "loss": 0.2298, "step": 693 }, { "epoch": 0.21668878307704317, "grad_norm": 0.6966788172721863, "learning_rate": 5e-05, "loss": 0.2268, "step": 694 }, { "epoch": 0.2170010147529467, "grad_norm": 0.7230420708656311, "learning_rate": 5e-05, "loss": 0.2582, "step": 695 }, { "epoch": 0.2173132464288502, "grad_norm": 0.6623227596282959, "learning_rate": 5e-05, "loss": 0.2223, "step": 696 }, { "epoch": 0.21762547810475372, "grad_norm": 0.6454969644546509, "learning_rate": 5e-05, "loss": 0.2033, "step": 697 }, { "epoch": 0.21793770978065724, "grad_norm": 0.6895531415939331, "learning_rate": 5e-05, "loss": 0.2303, "step": 698 }, { "epoch": 0.21824994145656076, "grad_norm": 0.6916325688362122, "learning_rate": 5e-05, "loss": 0.237, "step": 699 }, { "epoch": 0.2185621731324643, "grad_norm": 0.6802225708961487, "learning_rate": 5e-05, "loss": 0.2263, "step": 700 }, { "epoch": 0.2188744048083678, "grad_norm": 0.6971715688705444, "learning_rate": 5e-05, "loss": 0.244, "step": 701 }, { "epoch": 0.21918663648427134, "grad_norm": 0.6829821467399597, "learning_rate": 5e-05, "loss": 0.2314, "step": 702 }, { "epoch": 0.21949886816017486, "grad_norm": 0.6710850596427917, "learning_rate": 5e-05, "loss": 0.238, "step": 703 }, { "epoch": 0.21981109983607838, "grad_norm": 0.6740974187850952, "learning_rate": 5e-05, "loss": 0.2237, "step": 704 }, { "epoch": 0.22012333151198188, "grad_norm": 0.6301434636116028, "learning_rate": 5e-05, "loss": 0.2063, "step": 705 }, { "epoch": 0.2204355631878854, "grad_norm": 0.706469714641571, "learning_rate": 5e-05, "loss": 0.2345, "step": 706 }, { "epoch": 0.22074779486378893, "grad_norm": 0.6835813522338867, "learning_rate": 5e-05, "loss": 0.2427, "step": 707 }, { "epoch": 0.22106002653969245, "grad_norm": 0.6846723556518555, "learning_rate": 5e-05, "loss": 0.2433, "step": 708 }, { "epoch": 0.22137225821559597, "grad_norm": 0.6599858999252319, "learning_rate": 5e-05, "loss": 0.2188, "step": 709 }, { "epoch": 0.2216844898914995, "grad_norm": 0.6640614867210388, "learning_rate": 5e-05, "loss": 0.231, "step": 710 }, { "epoch": 0.22199672156740302, "grad_norm": 0.6669655442237854, "learning_rate": 5e-05, "loss": 0.2217, "step": 711 }, { "epoch": 0.22230895324330655, "grad_norm": 0.6607507467269897, "learning_rate": 5e-05, "loss": 0.2205, "step": 712 }, { "epoch": 0.22262118491921004, "grad_norm": 0.6922304034233093, "learning_rate": 5e-05, "loss": 0.2385, "step": 713 }, { "epoch": 0.22293341659511356, "grad_norm": 0.657249391078949, "learning_rate": 5e-05, "loss": 0.225, "step": 714 }, { "epoch": 0.2232456482710171, "grad_norm": 0.6998234391212463, "learning_rate": 5e-05, "loss": 0.231, "step": 715 }, { "epoch": 0.2235578799469206, "grad_norm": 0.6592785120010376, "learning_rate": 5e-05, "loss": 0.2198, "step": 716 }, { "epoch": 0.22387011162282414, "grad_norm": 0.6711361408233643, "learning_rate": 5e-05, "loss": 0.2326, "step": 717 }, { "epoch": 0.22418234329872766, "grad_norm": 0.6739751696586609, "learning_rate": 5e-05, "loss": 0.2291, "step": 718 }, { "epoch": 0.22449457497463118, "grad_norm": 0.6652281284332275, "learning_rate": 5e-05, "loss": 0.2266, "step": 719 }, { "epoch": 0.2248068066505347, "grad_norm": 0.6545221209526062, "learning_rate": 5e-05, "loss": 0.2135, "step": 720 }, { "epoch": 0.2251190383264382, "grad_norm": 0.6609752178192139, "learning_rate": 5e-05, "loss": 0.2315, "step": 721 }, { "epoch": 0.22543127000234173, "grad_norm": 0.7145570516586304, "learning_rate": 5e-05, "loss": 0.2453, "step": 722 }, { "epoch": 0.22574350167824525, "grad_norm": 0.6571034789085388, "learning_rate": 5e-05, "loss": 0.2168, "step": 723 }, { "epoch": 0.22605573335414877, "grad_norm": 0.6730474829673767, "learning_rate": 5e-05, "loss": 0.2287, "step": 724 }, { "epoch": 0.2263679650300523, "grad_norm": 0.7271101474761963, "learning_rate": 5e-05, "loss": 0.2401, "step": 725 }, { "epoch": 0.22668019670595582, "grad_norm": 0.6616913676261902, "learning_rate": 5e-05, "loss": 0.226, "step": 726 }, { "epoch": 0.22699242838185935, "grad_norm": 0.6630445122718811, "learning_rate": 5e-05, "loss": 0.2367, "step": 727 }, { "epoch": 0.22730466005776287, "grad_norm": 0.6623149514198303, "learning_rate": 5e-05, "loss": 0.2126, "step": 728 }, { "epoch": 0.2276168917336664, "grad_norm": 0.6539262533187866, "learning_rate": 5e-05, "loss": 0.2276, "step": 729 }, { "epoch": 0.2279291234095699, "grad_norm": 0.6249886751174927, "learning_rate": 5e-05, "loss": 0.1887, "step": 730 }, { "epoch": 0.2282413550854734, "grad_norm": 0.6590133309364319, "learning_rate": 5e-05, "loss": 0.2226, "step": 731 }, { "epoch": 0.22855358676137694, "grad_norm": 0.6780053377151489, "learning_rate": 5e-05, "loss": 0.2275, "step": 732 }, { "epoch": 0.22886581843728046, "grad_norm": 0.7044374346733093, "learning_rate": 5e-05, "loss": 0.243, "step": 733 }, { "epoch": 0.22917805011318398, "grad_norm": 0.6753026247024536, "learning_rate": 5e-05, "loss": 0.2109, "step": 734 }, { "epoch": 0.2294902817890875, "grad_norm": 0.6668133735656738, "learning_rate": 5e-05, "loss": 0.2147, "step": 735 }, { "epoch": 0.22980251346499103, "grad_norm": 0.7003597617149353, "learning_rate": 5e-05, "loss": 0.2313, "step": 736 }, { "epoch": 0.23011474514089456, "grad_norm": 0.6755712032318115, "learning_rate": 5e-05, "loss": 0.2312, "step": 737 }, { "epoch": 0.23042697681679805, "grad_norm": 0.6959272027015686, "learning_rate": 5e-05, "loss": 0.2333, "step": 738 }, { "epoch": 0.23073920849270158, "grad_norm": 0.7109386324882507, "learning_rate": 5e-05, "loss": 0.2552, "step": 739 }, { "epoch": 0.2310514401686051, "grad_norm": 0.7103399038314819, "learning_rate": 5e-05, "loss": 0.2433, "step": 740 }, { "epoch": 0.23136367184450862, "grad_norm": 0.7299090027809143, "learning_rate": 5e-05, "loss": 0.2522, "step": 741 }, { "epoch": 0.23167590352041215, "grad_norm": 0.6764361262321472, "learning_rate": 5e-05, "loss": 0.2396, "step": 742 }, { "epoch": 0.23198813519631567, "grad_norm": 0.6661965847015381, "learning_rate": 5e-05, "loss": 0.2332, "step": 743 }, { "epoch": 0.2323003668722192, "grad_norm": 0.6933612823486328, "learning_rate": 5e-05, "loss": 0.2575, "step": 744 }, { "epoch": 0.23261259854812272, "grad_norm": 0.7030240297317505, "learning_rate": 5e-05, "loss": 0.2479, "step": 745 }, { "epoch": 0.2329248302240262, "grad_norm": 0.6387937664985657, "learning_rate": 5e-05, "loss": 0.2182, "step": 746 }, { "epoch": 0.23323706189992974, "grad_norm": 0.6660206317901611, "learning_rate": 5e-05, "loss": 0.2266, "step": 747 }, { "epoch": 0.23354929357583326, "grad_norm": 0.6760786175727844, "learning_rate": 5e-05, "loss": 0.2375, "step": 748 }, { "epoch": 0.23386152525173678, "grad_norm": 0.6734618544578552, "learning_rate": 5e-05, "loss": 0.2429, "step": 749 }, { "epoch": 0.2341737569276403, "grad_norm": 0.6593843698501587, "learning_rate": 5e-05, "loss": 0.2158, "step": 750 }, { "epoch": 0.23448598860354383, "grad_norm": 0.6673220992088318, "learning_rate": 5e-05, "loss": 0.2311, "step": 751 }, { "epoch": 0.23479822027944736, "grad_norm": 0.7129413485527039, "learning_rate": 5e-05, "loss": 0.2316, "step": 752 }, { "epoch": 0.23511045195535088, "grad_norm": 0.7068873047828674, "learning_rate": 5e-05, "loss": 0.2375, "step": 753 }, { "epoch": 0.2354226836312544, "grad_norm": 0.6366264224052429, "learning_rate": 5e-05, "loss": 0.222, "step": 754 }, { "epoch": 0.2357349153071579, "grad_norm": 0.7189668416976929, "learning_rate": 5e-05, "loss": 0.2332, "step": 755 }, { "epoch": 0.23604714698306142, "grad_norm": 0.6474116444587708, "learning_rate": 5e-05, "loss": 0.2153, "step": 756 }, { "epoch": 0.23635937865896495, "grad_norm": 0.7125443816184998, "learning_rate": 5e-05, "loss": 0.235, "step": 757 }, { "epoch": 0.23667161033486847, "grad_norm": 0.6866033673286438, "learning_rate": 5e-05, "loss": 0.2228, "step": 758 }, { "epoch": 0.236983842010772, "grad_norm": 0.6689456701278687, "learning_rate": 5e-05, "loss": 0.2152, "step": 759 }, { "epoch": 0.23729607368667552, "grad_norm": 0.6996487975120544, "learning_rate": 5e-05, "loss": 0.2196, "step": 760 }, { "epoch": 0.23760830536257904, "grad_norm": 0.7293421030044556, "learning_rate": 5e-05, "loss": 0.244, "step": 761 }, { "epoch": 0.23792053703848257, "grad_norm": 0.6937530040740967, "learning_rate": 5e-05, "loss": 0.2325, "step": 762 }, { "epoch": 0.23823276871438606, "grad_norm": 0.7033172249794006, "learning_rate": 5e-05, "loss": 0.2217, "step": 763 }, { "epoch": 0.23854500039028959, "grad_norm": 0.7637369632720947, "learning_rate": 5e-05, "loss": 0.2693, "step": 764 }, { "epoch": 0.2388572320661931, "grad_norm": 0.6838207244873047, "learning_rate": 5e-05, "loss": 0.2455, "step": 765 }, { "epoch": 0.23916946374209663, "grad_norm": 0.7388439178466797, "learning_rate": 5e-05, "loss": 0.2402, "step": 766 }, { "epoch": 0.23948169541800016, "grad_norm": 0.6721513867378235, "learning_rate": 5e-05, "loss": 0.2183, "step": 767 }, { "epoch": 0.23979392709390368, "grad_norm": 0.6302772164344788, "learning_rate": 5e-05, "loss": 0.2226, "step": 768 }, { "epoch": 0.2401061587698072, "grad_norm": 0.6611379981040955, "learning_rate": 5e-05, "loss": 0.2238, "step": 769 }, { "epoch": 0.24041839044571073, "grad_norm": 0.668657660484314, "learning_rate": 5e-05, "loss": 0.2329, "step": 770 }, { "epoch": 0.24073062212161422, "grad_norm": 0.6928439736366272, "learning_rate": 5e-05, "loss": 0.2201, "step": 771 }, { "epoch": 0.24104285379751775, "grad_norm": 0.6479029655456543, "learning_rate": 5e-05, "loss": 0.2235, "step": 772 }, { "epoch": 0.24135508547342127, "grad_norm": 0.7166841626167297, "learning_rate": 5e-05, "loss": 0.2337, "step": 773 }, { "epoch": 0.2416673171493248, "grad_norm": 0.6603056192398071, "learning_rate": 5e-05, "loss": 0.2164, "step": 774 }, { "epoch": 0.24197954882522832, "grad_norm": 0.7197387218475342, "learning_rate": 5e-05, "loss": 0.2402, "step": 775 }, { "epoch": 0.24229178050113184, "grad_norm": 0.6881314516067505, "learning_rate": 5e-05, "loss": 0.2472, "step": 776 }, { "epoch": 0.24260401217703537, "grad_norm": 0.7526457905769348, "learning_rate": 5e-05, "loss": 0.2289, "step": 777 }, { "epoch": 0.2429162438529389, "grad_norm": 0.676089882850647, "learning_rate": 5e-05, "loss": 0.2259, "step": 778 }, { "epoch": 0.2432284755288424, "grad_norm": 0.6193670034408569, "learning_rate": 5e-05, "loss": 0.2073, "step": 779 }, { "epoch": 0.2435407072047459, "grad_norm": 0.649749755859375, "learning_rate": 5e-05, "loss": 0.2255, "step": 780 }, { "epoch": 0.24385293888064943, "grad_norm": 0.6677228212356567, "learning_rate": 5e-05, "loss": 0.245, "step": 781 }, { "epoch": 0.24416517055655296, "grad_norm": 0.6633369326591492, "learning_rate": 5e-05, "loss": 0.2332, "step": 782 }, { "epoch": 0.24447740223245648, "grad_norm": 0.6184995770454407, "learning_rate": 5e-05, "loss": 0.2157, "step": 783 }, { "epoch": 0.24478963390836, "grad_norm": 0.6678171753883362, "learning_rate": 5e-05, "loss": 0.2345, "step": 784 }, { "epoch": 0.24510186558426353, "grad_norm": 0.7446092367172241, "learning_rate": 5e-05, "loss": 0.233, "step": 785 }, { "epoch": 0.24541409726016705, "grad_norm": 0.6619665622711182, "learning_rate": 5e-05, "loss": 0.2225, "step": 786 }, { "epoch": 0.24572632893607058, "grad_norm": 0.640379786491394, "learning_rate": 5e-05, "loss": 0.2318, "step": 787 }, { "epoch": 0.24603856061197407, "grad_norm": 0.675929605960846, "learning_rate": 5e-05, "loss": 0.2247, "step": 788 }, { "epoch": 0.2463507922878776, "grad_norm": 0.6656391024589539, "learning_rate": 5e-05, "loss": 0.2319, "step": 789 }, { "epoch": 0.24666302396378112, "grad_norm": 0.6634158492088318, "learning_rate": 5e-05, "loss": 0.2335, "step": 790 }, { "epoch": 0.24697525563968464, "grad_norm": 0.6912989616394043, "learning_rate": 5e-05, "loss": 0.246, "step": 791 }, { "epoch": 0.24728748731558817, "grad_norm": 0.632605254650116, "learning_rate": 5e-05, "loss": 0.2126, "step": 792 }, { "epoch": 0.2475997189914917, "grad_norm": 0.6408424973487854, "learning_rate": 5e-05, "loss": 0.2236, "step": 793 }, { "epoch": 0.24791195066739521, "grad_norm": 0.6518585085868835, "learning_rate": 5e-05, "loss": 0.2129, "step": 794 }, { "epoch": 0.24822418234329874, "grad_norm": 0.7141333222389221, "learning_rate": 5e-05, "loss": 0.2424, "step": 795 }, { "epoch": 0.24853641401920223, "grad_norm": 0.7099282145500183, "learning_rate": 5e-05, "loss": 0.2254, "step": 796 }, { "epoch": 0.24884864569510576, "grad_norm": 0.6868832111358643, "learning_rate": 5e-05, "loss": 0.2279, "step": 797 }, { "epoch": 0.24916087737100928, "grad_norm": 0.7012250423431396, "learning_rate": 5e-05, "loss": 0.232, "step": 798 }, { "epoch": 0.2494731090469128, "grad_norm": 0.6358281373977661, "learning_rate": 5e-05, "loss": 0.223, "step": 799 }, { "epoch": 0.24978534072281633, "grad_norm": 0.6671461462974548, "learning_rate": 5e-05, "loss": 0.2347, "step": 800 }, { "epoch": 0.2500975723987198, "grad_norm": 0.645865261554718, "learning_rate": 5e-05, "loss": 0.2222, "step": 801 }, { "epoch": 0.2504098040746234, "grad_norm": 0.6678755283355713, "learning_rate": 5e-05, "loss": 0.2264, "step": 802 }, { "epoch": 0.2507220357505269, "grad_norm": 0.6449325084686279, "learning_rate": 5e-05, "loss": 0.2264, "step": 803 }, { "epoch": 0.2510342674264304, "grad_norm": 0.6654670238494873, "learning_rate": 5e-05, "loss": 0.2425, "step": 804 }, { "epoch": 0.2513464991023339, "grad_norm": 0.6739317178726196, "learning_rate": 5e-05, "loss": 0.2276, "step": 805 }, { "epoch": 0.25165873077823747, "grad_norm": 0.6537474393844604, "learning_rate": 5e-05, "loss": 0.2349, "step": 806 }, { "epoch": 0.25197096245414097, "grad_norm": 0.6277865171432495, "learning_rate": 5e-05, "loss": 0.2185, "step": 807 }, { "epoch": 0.2522831941300445, "grad_norm": 0.6713525652885437, "learning_rate": 5e-05, "loss": 0.2188, "step": 808 }, { "epoch": 0.252595425805948, "grad_norm": 0.6952540874481201, "learning_rate": 5e-05, "loss": 0.2269, "step": 809 }, { "epoch": 0.2529076574818515, "grad_norm": 0.6702281832695007, "learning_rate": 5e-05, "loss": 0.2366, "step": 810 }, { "epoch": 0.25321988915775506, "grad_norm": 0.6731393337249756, "learning_rate": 5e-05, "loss": 0.252, "step": 811 }, { "epoch": 0.25353212083365856, "grad_norm": 0.6402536630630493, "learning_rate": 5e-05, "loss": 0.2292, "step": 812 }, { "epoch": 0.2538443525095621, "grad_norm": 0.668272078037262, "learning_rate": 5e-05, "loss": 0.2336, "step": 813 }, { "epoch": 0.2541565841854656, "grad_norm": 0.6886191368103027, "learning_rate": 5e-05, "loss": 0.2495, "step": 814 }, { "epoch": 0.25446881586136916, "grad_norm": 0.6464875340461731, "learning_rate": 5e-05, "loss": 0.212, "step": 815 }, { "epoch": 0.25478104753727265, "grad_norm": 0.67216557264328, "learning_rate": 5e-05, "loss": 0.2388, "step": 816 }, { "epoch": 0.25509327921317615, "grad_norm": 0.652521014213562, "learning_rate": 5e-05, "loss": 0.2259, "step": 817 }, { "epoch": 0.2554055108890797, "grad_norm": 0.6515074968338013, "learning_rate": 5e-05, "loss": 0.2311, "step": 818 }, { "epoch": 0.2557177425649832, "grad_norm": 0.6833532452583313, "learning_rate": 5e-05, "loss": 0.2281, "step": 819 }, { "epoch": 0.25602997424088675, "grad_norm": 0.6521165370941162, "learning_rate": 5e-05, "loss": 0.2279, "step": 820 }, { "epoch": 0.25634220591679024, "grad_norm": 0.6089149713516235, "learning_rate": 5e-05, "loss": 0.2095, "step": 821 }, { "epoch": 0.2566544375926938, "grad_norm": 0.6815756559371948, "learning_rate": 5e-05, "loss": 0.2515, "step": 822 }, { "epoch": 0.2569666692685973, "grad_norm": 0.6744572520256042, "learning_rate": 5e-05, "loss": 0.2267, "step": 823 }, { "epoch": 0.25727890094450084, "grad_norm": 0.6745449900627136, "learning_rate": 5e-05, "loss": 0.2224, "step": 824 }, { "epoch": 0.25759113262040434, "grad_norm": 0.7254633903503418, "learning_rate": 5e-05, "loss": 0.2318, "step": 825 }, { "epoch": 0.25790336429630784, "grad_norm": 0.6471327543258667, "learning_rate": 5e-05, "loss": 0.2152, "step": 826 }, { "epoch": 0.2582155959722114, "grad_norm": 0.6815661787986755, "learning_rate": 5e-05, "loss": 0.2283, "step": 827 }, { "epoch": 0.2585278276481149, "grad_norm": 0.6244885921478271, "learning_rate": 5e-05, "loss": 0.2135, "step": 828 }, { "epoch": 0.25884005932401843, "grad_norm": 0.6564984321594238, "learning_rate": 5e-05, "loss": 0.229, "step": 829 }, { "epoch": 0.25915229099992193, "grad_norm": 0.6982418894767761, "learning_rate": 5e-05, "loss": 0.2307, "step": 830 }, { "epoch": 0.2594645226758255, "grad_norm": 0.6977365016937256, "learning_rate": 5e-05, "loss": 0.231, "step": 831 }, { "epoch": 0.259776754351729, "grad_norm": 0.6945233941078186, "learning_rate": 5e-05, "loss": 0.236, "step": 832 }, { "epoch": 0.26008898602763253, "grad_norm": 0.6545551419258118, "learning_rate": 5e-05, "loss": 0.2319, "step": 833 }, { "epoch": 0.260401217703536, "grad_norm": 0.6548911929130554, "learning_rate": 5e-05, "loss": 0.214, "step": 834 }, { "epoch": 0.2607134493794395, "grad_norm": 0.6569592356681824, "learning_rate": 5e-05, "loss": 0.2136, "step": 835 }, { "epoch": 0.2610256810553431, "grad_norm": 0.6746600270271301, "learning_rate": 5e-05, "loss": 0.2322, "step": 836 }, { "epoch": 0.26133791273124657, "grad_norm": 0.7235474586486816, "learning_rate": 5e-05, "loss": 0.2527, "step": 837 }, { "epoch": 0.2616501444071501, "grad_norm": 0.6870776414871216, "learning_rate": 5e-05, "loss": 0.2092, "step": 838 }, { "epoch": 0.2619623760830536, "grad_norm": 0.6914510130882263, "learning_rate": 5e-05, "loss": 0.2437, "step": 839 }, { "epoch": 0.26227460775895717, "grad_norm": 0.6694324612617493, "learning_rate": 5e-05, "loss": 0.248, "step": 840 }, { "epoch": 0.26258683943486066, "grad_norm": 0.6770434379577637, "learning_rate": 5e-05, "loss": 0.2252, "step": 841 }, { "epoch": 0.26289907111076416, "grad_norm": 0.6991760730743408, "learning_rate": 5e-05, "loss": 0.2309, "step": 842 }, { "epoch": 0.2632113027866677, "grad_norm": 0.6282133460044861, "learning_rate": 5e-05, "loss": 0.2232, "step": 843 }, { "epoch": 0.2635235344625712, "grad_norm": 0.6581993699073792, "learning_rate": 5e-05, "loss": 0.214, "step": 844 }, { "epoch": 0.26383576613847476, "grad_norm": 0.6565868258476257, "learning_rate": 5e-05, "loss": 0.2256, "step": 845 }, { "epoch": 0.26414799781437825, "grad_norm": 0.6438858509063721, "learning_rate": 5e-05, "loss": 0.235, "step": 846 }, { "epoch": 0.2644602294902818, "grad_norm": 0.6572927832603455, "learning_rate": 5e-05, "loss": 0.243, "step": 847 }, { "epoch": 0.2647724611661853, "grad_norm": 0.643601655960083, "learning_rate": 5e-05, "loss": 0.228, "step": 848 }, { "epoch": 0.26508469284208885, "grad_norm": 0.7074698805809021, "learning_rate": 5e-05, "loss": 0.2509, "step": 849 }, { "epoch": 0.26539692451799235, "grad_norm": 0.6729390621185303, "learning_rate": 5e-05, "loss": 0.251, "step": 850 }, { "epoch": 0.26570915619389585, "grad_norm": 0.6711787581443787, "learning_rate": 5e-05, "loss": 0.2328, "step": 851 }, { "epoch": 0.2660213878697994, "grad_norm": 0.6760109663009644, "learning_rate": 5e-05, "loss": 0.2322, "step": 852 }, { "epoch": 0.2663336195457029, "grad_norm": 0.6795610785484314, "learning_rate": 5e-05, "loss": 0.2231, "step": 853 }, { "epoch": 0.26664585122160644, "grad_norm": 0.6944295167922974, "learning_rate": 5e-05, "loss": 0.2289, "step": 854 }, { "epoch": 0.26695808289750994, "grad_norm": 0.6987950205802917, "learning_rate": 5e-05, "loss": 0.247, "step": 855 }, { "epoch": 0.2672703145734135, "grad_norm": 0.6865047216415405, "learning_rate": 5e-05, "loss": 0.2341, "step": 856 }, { "epoch": 0.267582546249317, "grad_norm": 0.6450919508934021, "learning_rate": 5e-05, "loss": 0.22, "step": 857 }, { "epoch": 0.26789477792522054, "grad_norm": 0.6834052205085754, "learning_rate": 5e-05, "loss": 0.2393, "step": 858 }, { "epoch": 0.26820700960112404, "grad_norm": 0.6807413697242737, "learning_rate": 5e-05, "loss": 0.228, "step": 859 }, { "epoch": 0.26851924127702753, "grad_norm": 0.6886799931526184, "learning_rate": 5e-05, "loss": 0.2427, "step": 860 }, { "epoch": 0.2688314729529311, "grad_norm": 0.635545551776886, "learning_rate": 5e-05, "loss": 0.2308, "step": 861 }, { "epoch": 0.2691437046288346, "grad_norm": 0.6802171468734741, "learning_rate": 5e-05, "loss": 0.2305, "step": 862 }, { "epoch": 0.26945593630473813, "grad_norm": 0.7168789505958557, "learning_rate": 5e-05, "loss": 0.2354, "step": 863 }, { "epoch": 0.2697681679806416, "grad_norm": 0.6907184720039368, "learning_rate": 5e-05, "loss": 0.2382, "step": 864 }, { "epoch": 0.2700803996565452, "grad_norm": 0.6295193433761597, "learning_rate": 5e-05, "loss": 0.2229, "step": 865 }, { "epoch": 0.2703926313324487, "grad_norm": 0.6649734377861023, "learning_rate": 5e-05, "loss": 0.2199, "step": 866 }, { "epoch": 0.27070486300835217, "grad_norm": 0.6462926268577576, "learning_rate": 5e-05, "loss": 0.2228, "step": 867 }, { "epoch": 0.2710170946842557, "grad_norm": 0.7503409385681152, "learning_rate": 5e-05, "loss": 0.2469, "step": 868 }, { "epoch": 0.2713293263601592, "grad_norm": 0.7014450430870056, "learning_rate": 5e-05, "loss": 0.2358, "step": 869 }, { "epoch": 0.27164155803606277, "grad_norm": 0.6451654434204102, "learning_rate": 5e-05, "loss": 0.2262, "step": 870 }, { "epoch": 0.27195378971196627, "grad_norm": 0.6707436442375183, "learning_rate": 5e-05, "loss": 0.2563, "step": 871 }, { "epoch": 0.2722660213878698, "grad_norm": 0.6582204699516296, "learning_rate": 5e-05, "loss": 0.2199, "step": 872 }, { "epoch": 0.2725782530637733, "grad_norm": 0.6277878880500793, "learning_rate": 5e-05, "loss": 0.2264, "step": 873 }, { "epoch": 0.27289048473967686, "grad_norm": 0.6574554443359375, "learning_rate": 5e-05, "loss": 0.2303, "step": 874 }, { "epoch": 0.27320271641558036, "grad_norm": 0.6395196318626404, "learning_rate": 5e-05, "loss": 0.2202, "step": 875 }, { "epoch": 0.27351494809148386, "grad_norm": 0.6417011618614197, "learning_rate": 5e-05, "loss": 0.2197, "step": 876 }, { "epoch": 0.2738271797673874, "grad_norm": 0.646888792514801, "learning_rate": 5e-05, "loss": 0.2245, "step": 877 }, { "epoch": 0.2741394114432909, "grad_norm": 0.6525022983551025, "learning_rate": 5e-05, "loss": 0.222, "step": 878 }, { "epoch": 0.27445164311919445, "grad_norm": 0.6694610714912415, "learning_rate": 5e-05, "loss": 0.2362, "step": 879 }, { "epoch": 0.27476387479509795, "grad_norm": 0.6637623906135559, "learning_rate": 5e-05, "loss": 0.2241, "step": 880 }, { "epoch": 0.2750761064710015, "grad_norm": 0.6937053799629211, "learning_rate": 5e-05, "loss": 0.2196, "step": 881 }, { "epoch": 0.275388338146905, "grad_norm": 0.7698585987091064, "learning_rate": 5e-05, "loss": 0.2517, "step": 882 }, { "epoch": 0.27570056982280855, "grad_norm": 0.6938981413841248, "learning_rate": 5e-05, "loss": 0.2483, "step": 883 }, { "epoch": 0.27601280149871205, "grad_norm": 0.6605616807937622, "learning_rate": 5e-05, "loss": 0.2264, "step": 884 }, { "epoch": 0.27632503317461554, "grad_norm": 0.6676762700080872, "learning_rate": 5e-05, "loss": 0.2149, "step": 885 }, { "epoch": 0.2766372648505191, "grad_norm": 0.644374430179596, "learning_rate": 5e-05, "loss": 0.2095, "step": 886 }, { "epoch": 0.2769494965264226, "grad_norm": 0.6585034132003784, "learning_rate": 5e-05, "loss": 0.2196, "step": 887 }, { "epoch": 0.27726172820232614, "grad_norm": 0.6624630689620972, "learning_rate": 5e-05, "loss": 0.228, "step": 888 }, { "epoch": 0.27757395987822964, "grad_norm": 0.6522522568702698, "learning_rate": 5e-05, "loss": 0.224, "step": 889 }, { "epoch": 0.2778861915541332, "grad_norm": 0.6456452012062073, "learning_rate": 5e-05, "loss": 0.2279, "step": 890 }, { "epoch": 0.2781984232300367, "grad_norm": 0.6825686693191528, "learning_rate": 5e-05, "loss": 0.2396, "step": 891 }, { "epoch": 0.2785106549059402, "grad_norm": 0.6561620831489563, "learning_rate": 5e-05, "loss": 0.2351, "step": 892 }, { "epoch": 0.27882288658184373, "grad_norm": 0.6667952537536621, "learning_rate": 5e-05, "loss": 0.2215, "step": 893 }, { "epoch": 0.27913511825774723, "grad_norm": 0.6314045786857605, "learning_rate": 5e-05, "loss": 0.2091, "step": 894 }, { "epoch": 0.2794473499336508, "grad_norm": 0.631007730960846, "learning_rate": 5e-05, "loss": 0.222, "step": 895 }, { "epoch": 0.2797595816095543, "grad_norm": 0.6783739924430847, "learning_rate": 5e-05, "loss": 0.2299, "step": 896 }, { "epoch": 0.2800718132854578, "grad_norm": 0.6412420868873596, "learning_rate": 5e-05, "loss": 0.2206, "step": 897 }, { "epoch": 0.2803840449613613, "grad_norm": 0.7038877010345459, "learning_rate": 5e-05, "loss": 0.235, "step": 898 }, { "epoch": 0.2806962766372649, "grad_norm": 0.6397419571876526, "learning_rate": 5e-05, "loss": 0.2187, "step": 899 }, { "epoch": 0.28100850831316837, "grad_norm": 0.6605945229530334, "learning_rate": 5e-05, "loss": 0.2165, "step": 900 }, { "epoch": 0.28132073998907187, "grad_norm": 0.6760973334312439, "learning_rate": 5e-05, "loss": 0.215, "step": 901 }, { "epoch": 0.2816329716649754, "grad_norm": 0.6838994026184082, "learning_rate": 5e-05, "loss": 0.2253, "step": 902 }, { "epoch": 0.2819452033408789, "grad_norm": 0.6481850743293762, "learning_rate": 5e-05, "loss": 0.2157, "step": 903 }, { "epoch": 0.28225743501678247, "grad_norm": 0.685012936592102, "learning_rate": 5e-05, "loss": 0.2059, "step": 904 }, { "epoch": 0.28256966669268596, "grad_norm": 0.6798831224441528, "learning_rate": 5e-05, "loss": 0.2236, "step": 905 }, { "epoch": 0.2828818983685895, "grad_norm": 0.6833891272544861, "learning_rate": 5e-05, "loss": 0.225, "step": 906 }, { "epoch": 0.283194130044493, "grad_norm": 0.6771740913391113, "learning_rate": 5e-05, "loss": 0.2291, "step": 907 }, { "epoch": 0.28350636172039656, "grad_norm": 0.6842947602272034, "learning_rate": 5e-05, "loss": 0.2387, "step": 908 }, { "epoch": 0.28381859339630006, "grad_norm": 0.644900381565094, "learning_rate": 5e-05, "loss": 0.2123, "step": 909 }, { "epoch": 0.28413082507220355, "grad_norm": 0.7056095004081726, "learning_rate": 5e-05, "loss": 0.2477, "step": 910 }, { "epoch": 0.2844430567481071, "grad_norm": 0.665339469909668, "learning_rate": 5e-05, "loss": 0.2375, "step": 911 }, { "epoch": 0.2847552884240106, "grad_norm": 0.6702404618263245, "learning_rate": 5e-05, "loss": 0.2189, "step": 912 }, { "epoch": 0.28506752009991415, "grad_norm": 0.6581178307533264, "learning_rate": 5e-05, "loss": 0.2166, "step": 913 }, { "epoch": 0.28537975177581765, "grad_norm": 0.6344070434570312, "learning_rate": 5e-05, "loss": 0.2309, "step": 914 }, { "epoch": 0.2856919834517212, "grad_norm": 0.6501642465591431, "learning_rate": 5e-05, "loss": 0.2345, "step": 915 }, { "epoch": 0.2860042151276247, "grad_norm": 0.6655405759811401, "learning_rate": 5e-05, "loss": 0.2245, "step": 916 }, { "epoch": 0.2863164468035282, "grad_norm": 0.6509348154067993, "learning_rate": 5e-05, "loss": 0.2191, "step": 917 }, { "epoch": 0.28662867847943174, "grad_norm": 0.7103174328804016, "learning_rate": 5e-05, "loss": 0.2278, "step": 918 }, { "epoch": 0.28694091015533524, "grad_norm": 0.6506911516189575, "learning_rate": 5e-05, "loss": 0.2272, "step": 919 }, { "epoch": 0.2872531418312388, "grad_norm": 0.6529115438461304, "learning_rate": 5e-05, "loss": 0.2328, "step": 920 }, { "epoch": 0.2875653735071423, "grad_norm": 0.6563238501548767, "learning_rate": 5e-05, "loss": 0.223, "step": 921 }, { "epoch": 0.28787760518304584, "grad_norm": 0.6426532864570618, "learning_rate": 5e-05, "loss": 0.2179, "step": 922 }, { "epoch": 0.28818983685894933, "grad_norm": 0.6501548886299133, "learning_rate": 5e-05, "loss": 0.2369, "step": 923 }, { "epoch": 0.2885020685348529, "grad_norm": 0.6909629106521606, "learning_rate": 5e-05, "loss": 0.233, "step": 924 }, { "epoch": 0.2888143002107564, "grad_norm": 0.6424227952957153, "learning_rate": 5e-05, "loss": 0.2249, "step": 925 }, { "epoch": 0.2891265318866599, "grad_norm": 0.6961096525192261, "learning_rate": 5e-05, "loss": 0.2369, "step": 926 }, { "epoch": 0.28943876356256343, "grad_norm": 0.6280301809310913, "learning_rate": 5e-05, "loss": 0.2186, "step": 927 }, { "epoch": 0.2897509952384669, "grad_norm": 0.6787440776824951, "learning_rate": 5e-05, "loss": 0.2178, "step": 928 }, { "epoch": 0.2900632269143705, "grad_norm": 0.6161020994186401, "learning_rate": 5e-05, "loss": 0.2287, "step": 929 }, { "epoch": 0.29037545859027397, "grad_norm": 0.7143908739089966, "learning_rate": 5e-05, "loss": 0.2371, "step": 930 }, { "epoch": 0.2906876902661775, "grad_norm": 0.7029746174812317, "learning_rate": 5e-05, "loss": 0.2382, "step": 931 }, { "epoch": 0.290999921942081, "grad_norm": 0.682037889957428, "learning_rate": 5e-05, "loss": 0.2218, "step": 932 }, { "epoch": 0.29131215361798457, "grad_norm": 0.6673375368118286, "learning_rate": 5e-05, "loss": 0.2207, "step": 933 }, { "epoch": 0.29162438529388807, "grad_norm": 0.7138528823852539, "learning_rate": 5e-05, "loss": 0.234, "step": 934 }, { "epoch": 0.29193661696979156, "grad_norm": 0.660064160823822, "learning_rate": 5e-05, "loss": 0.2189, "step": 935 }, { "epoch": 0.2922488486456951, "grad_norm": 0.6824430823326111, "learning_rate": 5e-05, "loss": 0.2399, "step": 936 }, { "epoch": 0.2925610803215986, "grad_norm": 0.6506645679473877, "learning_rate": 5e-05, "loss": 0.2167, "step": 937 }, { "epoch": 0.29287331199750216, "grad_norm": 0.6457629203796387, "learning_rate": 5e-05, "loss": 0.214, "step": 938 }, { "epoch": 0.29318554367340566, "grad_norm": 0.6527909636497498, "learning_rate": 5e-05, "loss": 0.2177, "step": 939 }, { "epoch": 0.2934977753493092, "grad_norm": 0.6384020447731018, "learning_rate": 5e-05, "loss": 0.2072, "step": 940 }, { "epoch": 0.2938100070252127, "grad_norm": 0.6266273856163025, "learning_rate": 5e-05, "loss": 0.2042, "step": 941 }, { "epoch": 0.2941222387011162, "grad_norm": 0.6439869403839111, "learning_rate": 5e-05, "loss": 0.2201, "step": 942 }, { "epoch": 0.29443447037701975, "grad_norm": 0.649206817150116, "learning_rate": 5e-05, "loss": 0.2123, "step": 943 }, { "epoch": 0.29474670205292325, "grad_norm": 0.6791210174560547, "learning_rate": 5e-05, "loss": 0.2027, "step": 944 }, { "epoch": 0.2950589337288268, "grad_norm": 0.7195387482643127, "learning_rate": 5e-05, "loss": 0.2444, "step": 945 }, { "epoch": 0.2953711654047303, "grad_norm": 0.7042203545570374, "learning_rate": 5e-05, "loss": 0.2339, "step": 946 }, { "epoch": 0.29568339708063385, "grad_norm": 0.6732985973358154, "learning_rate": 5e-05, "loss": 0.2358, "step": 947 }, { "epoch": 0.29599562875653734, "grad_norm": 0.6501320600509644, "learning_rate": 5e-05, "loss": 0.2292, "step": 948 }, { "epoch": 0.2963078604324409, "grad_norm": 0.6882880926132202, "learning_rate": 5e-05, "loss": 0.2311, "step": 949 }, { "epoch": 0.2966200921083444, "grad_norm": 0.7328774333000183, "learning_rate": 5e-05, "loss": 0.2491, "step": 950 }, { "epoch": 0.2969323237842479, "grad_norm": 0.638482928276062, "learning_rate": 5e-05, "loss": 0.2229, "step": 951 }, { "epoch": 0.29724455546015144, "grad_norm": 0.6121382713317871, "learning_rate": 5e-05, "loss": 0.2126, "step": 952 }, { "epoch": 0.29755678713605493, "grad_norm": 0.6461104154586792, "learning_rate": 5e-05, "loss": 0.2326, "step": 953 }, { "epoch": 0.2978690188119585, "grad_norm": 0.6100934743881226, "learning_rate": 5e-05, "loss": 0.2012, "step": 954 }, { "epoch": 0.298181250487862, "grad_norm": 0.669475257396698, "learning_rate": 5e-05, "loss": 0.2346, "step": 955 }, { "epoch": 0.29849348216376553, "grad_norm": 0.6572253704071045, "learning_rate": 5e-05, "loss": 0.2225, "step": 956 }, { "epoch": 0.29880571383966903, "grad_norm": 0.6559196710586548, "learning_rate": 5e-05, "loss": 0.2131, "step": 957 }, { "epoch": 0.2991179455155726, "grad_norm": 0.6885977387428284, "learning_rate": 5e-05, "loss": 0.2272, "step": 958 }, { "epoch": 0.2994301771914761, "grad_norm": 0.6522559523582458, "learning_rate": 5e-05, "loss": 0.2214, "step": 959 }, { "epoch": 0.2997424088673796, "grad_norm": 0.6439231038093567, "learning_rate": 5e-05, "loss": 0.225, "step": 960 }, { "epoch": 0.3000546405432831, "grad_norm": 0.6250431537628174, "learning_rate": 5e-05, "loss": 0.2109, "step": 961 }, { "epoch": 0.3003668722191866, "grad_norm": 0.6827552914619446, "learning_rate": 5e-05, "loss": 0.2424, "step": 962 }, { "epoch": 0.30067910389509017, "grad_norm": 0.670535683631897, "learning_rate": 5e-05, "loss": 0.2435, "step": 963 }, { "epoch": 0.30099133557099367, "grad_norm": 0.6815527677536011, "learning_rate": 5e-05, "loss": 0.2281, "step": 964 }, { "epoch": 0.3013035672468972, "grad_norm": 0.6591168642044067, "learning_rate": 5e-05, "loss": 0.2253, "step": 965 }, { "epoch": 0.3016157989228007, "grad_norm": 0.7042067050933838, "learning_rate": 5e-05, "loss": 0.2187, "step": 966 }, { "epoch": 0.3019280305987042, "grad_norm": 0.6670171618461609, "learning_rate": 5e-05, "loss": 0.2256, "step": 967 }, { "epoch": 0.30224026227460776, "grad_norm": 0.6426310539245605, "learning_rate": 5e-05, "loss": 0.2187, "step": 968 }, { "epoch": 0.30255249395051126, "grad_norm": 0.6459170579910278, "learning_rate": 5e-05, "loss": 0.2255, "step": 969 }, { "epoch": 0.3028647256264148, "grad_norm": 0.6066742539405823, "learning_rate": 5e-05, "loss": 0.216, "step": 970 }, { "epoch": 0.3031769573023183, "grad_norm": 0.6452570557594299, "learning_rate": 5e-05, "loss": 0.2173, "step": 971 }, { "epoch": 0.30348918897822186, "grad_norm": 0.633155882358551, "learning_rate": 5e-05, "loss": 0.2248, "step": 972 }, { "epoch": 0.30380142065412535, "grad_norm": 0.6343851685523987, "learning_rate": 5e-05, "loss": 0.2154, "step": 973 }, { "epoch": 0.3041136523300289, "grad_norm": 0.6807835102081299, "learning_rate": 5e-05, "loss": 0.2288, "step": 974 }, { "epoch": 0.3044258840059324, "grad_norm": 0.6434829235076904, "learning_rate": 5e-05, "loss": 0.2095, "step": 975 }, { "epoch": 0.3047381156818359, "grad_norm": 0.673392117023468, "learning_rate": 5e-05, "loss": 0.2249, "step": 976 }, { "epoch": 0.30505034735773945, "grad_norm": 0.6583318114280701, "learning_rate": 5e-05, "loss": 0.2094, "step": 977 }, { "epoch": 0.30536257903364294, "grad_norm": 0.6439551711082458, "learning_rate": 5e-05, "loss": 0.2234, "step": 978 }, { "epoch": 0.3056748107095465, "grad_norm": 0.6621553301811218, "learning_rate": 5e-05, "loss": 0.2216, "step": 979 }, { "epoch": 0.30598704238545, "grad_norm": 0.6370599269866943, "learning_rate": 5e-05, "loss": 0.2193, "step": 980 }, { "epoch": 0.30629927406135354, "grad_norm": 0.6871243119239807, "learning_rate": 5e-05, "loss": 0.2336, "step": 981 }, { "epoch": 0.30661150573725704, "grad_norm": 0.6252028346061707, "learning_rate": 5e-05, "loss": 0.2096, "step": 982 }, { "epoch": 0.3069237374131606, "grad_norm": 0.669984757900238, "learning_rate": 5e-05, "loss": 0.2233, "step": 983 }, { "epoch": 0.3072359690890641, "grad_norm": 0.6828444600105286, "learning_rate": 5e-05, "loss": 0.2351, "step": 984 }, { "epoch": 0.3075482007649676, "grad_norm": 0.6589028835296631, "learning_rate": 5e-05, "loss": 0.2271, "step": 985 }, { "epoch": 0.30786043244087113, "grad_norm": 0.6701045036315918, "learning_rate": 5e-05, "loss": 0.2224, "step": 986 }, { "epoch": 0.30817266411677463, "grad_norm": 0.6812865138053894, "learning_rate": 5e-05, "loss": 0.2398, "step": 987 }, { "epoch": 0.3084848957926782, "grad_norm": 0.643173098564148, "learning_rate": 5e-05, "loss": 0.2093, "step": 988 }, { "epoch": 0.3087971274685817, "grad_norm": 0.64588862657547, "learning_rate": 5e-05, "loss": 0.2279, "step": 989 }, { "epoch": 0.30910935914448523, "grad_norm": 0.6522712111473083, "learning_rate": 5e-05, "loss": 0.2253, "step": 990 }, { "epoch": 0.3094215908203887, "grad_norm": 0.6824530363082886, "learning_rate": 5e-05, "loss": 0.2218, "step": 991 }, { "epoch": 0.3097338224962922, "grad_norm": 0.6517541408538818, "learning_rate": 5e-05, "loss": 0.2182, "step": 992 }, { "epoch": 0.3100460541721958, "grad_norm": 0.6642028093338013, "learning_rate": 5e-05, "loss": 0.2298, "step": 993 }, { "epoch": 0.31035828584809927, "grad_norm": 0.6538026332855225, "learning_rate": 5e-05, "loss": 0.2498, "step": 994 }, { "epoch": 0.3106705175240028, "grad_norm": 0.686779797077179, "learning_rate": 5e-05, "loss": 0.2547, "step": 995 }, { "epoch": 0.3109827491999063, "grad_norm": 0.6427977681159973, "learning_rate": 5e-05, "loss": 0.2274, "step": 996 }, { "epoch": 0.31129498087580987, "grad_norm": 0.6584010124206543, "learning_rate": 5e-05, "loss": 0.2257, "step": 997 }, { "epoch": 0.31160721255171336, "grad_norm": 0.6650153398513794, "learning_rate": 5e-05, "loss": 0.2417, "step": 998 }, { "epoch": 0.3119194442276169, "grad_norm": 0.6464514136314392, "learning_rate": 5e-05, "loss": 0.2186, "step": 999 }, { "epoch": 0.3122316759035204, "grad_norm": 0.646162748336792, "learning_rate": 5e-05, "loss": 0.23, "step": 1000 }, { "epoch": 0.3125439075794239, "grad_norm": 0.6822746396064758, "learning_rate": 5e-05, "loss": 0.2344, "step": 1001 }, { "epoch": 0.31285613925532746, "grad_norm": 0.6632934212684631, "learning_rate": 5e-05, "loss": 0.2212, "step": 1002 }, { "epoch": 0.31316837093123095, "grad_norm": 0.6737279295921326, "learning_rate": 5e-05, "loss": 0.2322, "step": 1003 }, { "epoch": 0.3134806026071345, "grad_norm": 0.6884944438934326, "learning_rate": 5e-05, "loss": 0.2371, "step": 1004 }, { "epoch": 0.313792834283038, "grad_norm": 0.6568999290466309, "learning_rate": 5e-05, "loss": 0.2432, "step": 1005 }, { "epoch": 0.31410506595894155, "grad_norm": 0.7123122215270996, "learning_rate": 5e-05, "loss": 0.2173, "step": 1006 }, { "epoch": 0.31441729763484505, "grad_norm": 0.6535437107086182, "learning_rate": 5e-05, "loss": 0.2169, "step": 1007 }, { "epoch": 0.3147295293107486, "grad_norm": 0.717993438243866, "learning_rate": 5e-05, "loss": 0.2301, "step": 1008 }, { "epoch": 0.3150417609866521, "grad_norm": 0.6668649315834045, "learning_rate": 5e-05, "loss": 0.2448, "step": 1009 }, { "epoch": 0.3153539926625556, "grad_norm": 0.6443164944648743, "learning_rate": 5e-05, "loss": 0.2181, "step": 1010 }, { "epoch": 0.31566622433845914, "grad_norm": 0.6940616369247437, "learning_rate": 5e-05, "loss": 0.2383, "step": 1011 }, { "epoch": 0.31597845601436264, "grad_norm": 0.6230196356773376, "learning_rate": 5e-05, "loss": 0.2069, "step": 1012 }, { "epoch": 0.3162906876902662, "grad_norm": 0.6467565894126892, "learning_rate": 5e-05, "loss": 0.2238, "step": 1013 }, { "epoch": 0.3166029193661697, "grad_norm": 0.6584547758102417, "learning_rate": 5e-05, "loss": 0.2183, "step": 1014 }, { "epoch": 0.31691515104207324, "grad_norm": 0.6688600778579712, "learning_rate": 5e-05, "loss": 0.2249, "step": 1015 }, { "epoch": 0.31722738271797674, "grad_norm": 0.7189931273460388, "learning_rate": 5e-05, "loss": 0.2335, "step": 1016 }, { "epoch": 0.31753961439388023, "grad_norm": 0.6805618405342102, "learning_rate": 5e-05, "loss": 0.2428, "step": 1017 }, { "epoch": 0.3178518460697838, "grad_norm": 0.664412796497345, "learning_rate": 5e-05, "loss": 0.2239, "step": 1018 }, { "epoch": 0.3181640777456873, "grad_norm": 0.7259537577629089, "learning_rate": 5e-05, "loss": 0.2486, "step": 1019 }, { "epoch": 0.31847630942159083, "grad_norm": 0.6365126967430115, "learning_rate": 5e-05, "loss": 0.2116, "step": 1020 }, { "epoch": 0.3187885410974943, "grad_norm": 0.656096875667572, "learning_rate": 5e-05, "loss": 0.2238, "step": 1021 }, { "epoch": 0.3191007727733979, "grad_norm": 0.7076267600059509, "learning_rate": 5e-05, "loss": 0.2292, "step": 1022 }, { "epoch": 0.3194130044493014, "grad_norm": 0.6577603816986084, "learning_rate": 5e-05, "loss": 0.224, "step": 1023 }, { "epoch": 0.3197252361252049, "grad_norm": 0.6622909903526306, "learning_rate": 5e-05, "loss": 0.2301, "step": 1024 }, { "epoch": 0.3200374678011084, "grad_norm": 0.633216917514801, "learning_rate": 5e-05, "loss": 0.2251, "step": 1025 }, { "epoch": 0.3203496994770119, "grad_norm": 0.6588516235351562, "learning_rate": 5e-05, "loss": 0.2197, "step": 1026 }, { "epoch": 0.32066193115291547, "grad_norm": 0.6427306532859802, "learning_rate": 5e-05, "loss": 0.2238, "step": 1027 }, { "epoch": 0.32097416282881897, "grad_norm": 0.6513209939002991, "learning_rate": 5e-05, "loss": 0.2318, "step": 1028 }, { "epoch": 0.3212863945047225, "grad_norm": 0.6540398001670837, "learning_rate": 5e-05, "loss": 0.2351, "step": 1029 }, { "epoch": 0.321598626180626, "grad_norm": 0.64468914270401, "learning_rate": 5e-05, "loss": 0.2126, "step": 1030 }, { "epoch": 0.32191085785652956, "grad_norm": 0.6348252296447754, "learning_rate": 5e-05, "loss": 0.2156, "step": 1031 }, { "epoch": 0.32222308953243306, "grad_norm": 0.6588594317436218, "learning_rate": 5e-05, "loss": 0.2285, "step": 1032 }, { "epoch": 0.3225353212083366, "grad_norm": 0.6859313249588013, "learning_rate": 5e-05, "loss": 0.2412, "step": 1033 }, { "epoch": 0.3228475528842401, "grad_norm": 0.6670272350311279, "learning_rate": 5e-05, "loss": 0.244, "step": 1034 }, { "epoch": 0.3231597845601436, "grad_norm": 0.6182592511177063, "learning_rate": 5e-05, "loss": 0.2147, "step": 1035 }, { "epoch": 0.32347201623604716, "grad_norm": 0.6454702615737915, "learning_rate": 5e-05, "loss": 0.2114, "step": 1036 }, { "epoch": 0.32378424791195065, "grad_norm": 0.6802915930747986, "learning_rate": 5e-05, "loss": 0.2322, "step": 1037 }, { "epoch": 0.3240964795878542, "grad_norm": 0.6021156311035156, "learning_rate": 5e-05, "loss": 0.2027, "step": 1038 }, { "epoch": 0.3244087112637577, "grad_norm": 0.6269187927246094, "learning_rate": 5e-05, "loss": 0.2306, "step": 1039 }, { "epoch": 0.32472094293966125, "grad_norm": 0.6172708868980408, "learning_rate": 5e-05, "loss": 0.2223, "step": 1040 }, { "epoch": 0.32503317461556475, "grad_norm": 0.5982217788696289, "learning_rate": 5e-05, "loss": 0.1986, "step": 1041 }, { "epoch": 0.32534540629146824, "grad_norm": 0.6584370732307434, "learning_rate": 5e-05, "loss": 0.2354, "step": 1042 }, { "epoch": 0.3256576379673718, "grad_norm": 0.6829003095626831, "learning_rate": 5e-05, "loss": 0.2362, "step": 1043 }, { "epoch": 0.3259698696432753, "grad_norm": 0.6600732207298279, "learning_rate": 5e-05, "loss": 0.2371, "step": 1044 }, { "epoch": 0.32628210131917884, "grad_norm": 0.6914985775947571, "learning_rate": 5e-05, "loss": 0.2444, "step": 1045 }, { "epoch": 0.32659433299508234, "grad_norm": 0.6551594734191895, "learning_rate": 5e-05, "loss": 0.2312, "step": 1046 }, { "epoch": 0.3269065646709859, "grad_norm": 0.6793872117996216, "learning_rate": 5e-05, "loss": 0.2406, "step": 1047 }, { "epoch": 0.3272187963468894, "grad_norm": 0.6497758030891418, "learning_rate": 5e-05, "loss": 0.2182, "step": 1048 }, { "epoch": 0.32753102802279294, "grad_norm": 0.6256152987480164, "learning_rate": 5e-05, "loss": 0.2066, "step": 1049 }, { "epoch": 0.32784325969869643, "grad_norm": 0.6643775105476379, "learning_rate": 5e-05, "loss": 0.2209, "step": 1050 }, { "epoch": 0.32815549137459993, "grad_norm": 0.6317597031593323, "learning_rate": 5e-05, "loss": 0.2318, "step": 1051 }, { "epoch": 0.3284677230505035, "grad_norm": 0.6639249324798584, "learning_rate": 5e-05, "loss": 0.2242, "step": 1052 }, { "epoch": 0.328779954726407, "grad_norm": 0.678693950176239, "learning_rate": 5e-05, "loss": 0.2348, "step": 1053 }, { "epoch": 0.3290921864023105, "grad_norm": 0.6573380827903748, "learning_rate": 5e-05, "loss": 0.2333, "step": 1054 }, { "epoch": 0.329404418078214, "grad_norm": 0.6380922794342041, "learning_rate": 5e-05, "loss": 0.2171, "step": 1055 }, { "epoch": 0.3297166497541176, "grad_norm": 0.6172462105751038, "learning_rate": 5e-05, "loss": 0.2246, "step": 1056 }, { "epoch": 0.33002888143002107, "grad_norm": 0.671401858329773, "learning_rate": 5e-05, "loss": 0.2291, "step": 1057 }, { "epoch": 0.3303411131059246, "grad_norm": 0.6157158017158508, "learning_rate": 5e-05, "loss": 0.2132, "step": 1058 }, { "epoch": 0.3306533447818281, "grad_norm": 0.6883261799812317, "learning_rate": 5e-05, "loss": 0.2325, "step": 1059 }, { "epoch": 0.3309655764577316, "grad_norm": 0.6727818846702576, "learning_rate": 5e-05, "loss": 0.2262, "step": 1060 }, { "epoch": 0.33127780813363517, "grad_norm": 0.6693524122238159, "learning_rate": 5e-05, "loss": 0.2177, "step": 1061 }, { "epoch": 0.33159003980953866, "grad_norm": 0.6557754874229431, "learning_rate": 5e-05, "loss": 0.2274, "step": 1062 }, { "epoch": 0.3319022714854422, "grad_norm": 0.6462004780769348, "learning_rate": 5e-05, "loss": 0.2314, "step": 1063 }, { "epoch": 0.3322145031613457, "grad_norm": 0.6578602194786072, "learning_rate": 5e-05, "loss": 0.2312, "step": 1064 }, { "epoch": 0.33252673483724926, "grad_norm": 0.701331615447998, "learning_rate": 5e-05, "loss": 0.2387, "step": 1065 }, { "epoch": 0.33283896651315276, "grad_norm": 0.6908705234527588, "learning_rate": 5e-05, "loss": 0.2351, "step": 1066 }, { "epoch": 0.33315119818905625, "grad_norm": 0.6261702179908752, "learning_rate": 5e-05, "loss": 0.2248, "step": 1067 }, { "epoch": 0.3334634298649598, "grad_norm": 0.6824831962585449, "learning_rate": 5e-05, "loss": 0.2498, "step": 1068 }, { "epoch": 0.3337756615408633, "grad_norm": 0.663324236869812, "learning_rate": 5e-05, "loss": 0.2244, "step": 1069 }, { "epoch": 0.33408789321676685, "grad_norm": 0.6329442262649536, "learning_rate": 5e-05, "loss": 0.204, "step": 1070 }, { "epoch": 0.33440012489267035, "grad_norm": 0.6372144222259521, "learning_rate": 5e-05, "loss": 0.228, "step": 1071 }, { "epoch": 0.3347123565685739, "grad_norm": 0.6833580136299133, "learning_rate": 5e-05, "loss": 0.2496, "step": 1072 }, { "epoch": 0.3350245882444774, "grad_norm": 0.623944103717804, "learning_rate": 5e-05, "loss": 0.2305, "step": 1073 }, { "epoch": 0.33533681992038095, "grad_norm": 0.6889824867248535, "learning_rate": 5e-05, "loss": 0.2314, "step": 1074 }, { "epoch": 0.33564905159628444, "grad_norm": 0.6677700281143188, "learning_rate": 5e-05, "loss": 0.2245, "step": 1075 }, { "epoch": 0.33596128327218794, "grad_norm": 0.6135799884796143, "learning_rate": 5e-05, "loss": 0.2152, "step": 1076 }, { "epoch": 0.3362735149480915, "grad_norm": 0.6233729124069214, "learning_rate": 5e-05, "loss": 0.2193, "step": 1077 }, { "epoch": 0.336585746623995, "grad_norm": 0.6323064565658569, "learning_rate": 5e-05, "loss": 0.2049, "step": 1078 }, { "epoch": 0.33689797829989854, "grad_norm": 0.6220356225967407, "learning_rate": 5e-05, "loss": 0.2193, "step": 1079 }, { "epoch": 0.33721020997580203, "grad_norm": 0.6229811310768127, "learning_rate": 5e-05, "loss": 0.2165, "step": 1080 }, { "epoch": 0.3375224416517056, "grad_norm": 0.6495068669319153, "learning_rate": 5e-05, "loss": 0.228, "step": 1081 }, { "epoch": 0.3378346733276091, "grad_norm": 0.5990424156188965, "learning_rate": 5e-05, "loss": 0.2116, "step": 1082 }, { "epoch": 0.33814690500351263, "grad_norm": 0.6789101362228394, "learning_rate": 5e-05, "loss": 0.238, "step": 1083 }, { "epoch": 0.33845913667941613, "grad_norm": 0.6444856524467468, "learning_rate": 5e-05, "loss": 0.2097, "step": 1084 }, { "epoch": 0.3387713683553196, "grad_norm": 0.6272294521331787, "learning_rate": 5e-05, "loss": 0.2169, "step": 1085 }, { "epoch": 0.3390836000312232, "grad_norm": 0.6532320380210876, "learning_rate": 5e-05, "loss": 0.2274, "step": 1086 }, { "epoch": 0.33939583170712667, "grad_norm": 0.6811516880989075, "learning_rate": 5e-05, "loss": 0.2267, "step": 1087 }, { "epoch": 0.3397080633830302, "grad_norm": 0.724330723285675, "learning_rate": 5e-05, "loss": 0.2572, "step": 1088 }, { "epoch": 0.3400202950589337, "grad_norm": 0.6884994506835938, "learning_rate": 5e-05, "loss": 0.239, "step": 1089 }, { "epoch": 0.34033252673483727, "grad_norm": 0.6999273300170898, "learning_rate": 5e-05, "loss": 0.2253, "step": 1090 }, { "epoch": 0.34064475841074077, "grad_norm": 0.6613145470619202, "learning_rate": 5e-05, "loss": 0.2182, "step": 1091 }, { "epoch": 0.34095699008664426, "grad_norm": 0.6428823471069336, "learning_rate": 5e-05, "loss": 0.2255, "step": 1092 }, { "epoch": 0.3412692217625478, "grad_norm": 0.6048131585121155, "learning_rate": 5e-05, "loss": 0.2146, "step": 1093 }, { "epoch": 0.3415814534384513, "grad_norm": 0.6318319439888, "learning_rate": 5e-05, "loss": 0.2285, "step": 1094 }, { "epoch": 0.34189368511435486, "grad_norm": 0.6563419699668884, "learning_rate": 5e-05, "loss": 0.2087, "step": 1095 }, { "epoch": 0.34220591679025836, "grad_norm": 0.6334874033927917, "learning_rate": 5e-05, "loss": 0.2248, "step": 1096 }, { "epoch": 0.3425181484661619, "grad_norm": 0.6644085645675659, "learning_rate": 5e-05, "loss": 0.2301, "step": 1097 }, { "epoch": 0.3428303801420654, "grad_norm": 0.6336389780044556, "learning_rate": 5e-05, "loss": 0.2101, "step": 1098 }, { "epoch": 0.34314261181796896, "grad_norm": 0.6566491723060608, "learning_rate": 5e-05, "loss": 0.2345, "step": 1099 }, { "epoch": 0.34345484349387245, "grad_norm": 0.6474869251251221, "learning_rate": 5e-05, "loss": 0.2343, "step": 1100 }, { "epoch": 0.34376707516977595, "grad_norm": 0.6375615000724792, "learning_rate": 5e-05, "loss": 0.2259, "step": 1101 }, { "epoch": 0.3440793068456795, "grad_norm": 0.7052745819091797, "learning_rate": 5e-05, "loss": 0.2401, "step": 1102 }, { "epoch": 0.344391538521583, "grad_norm": 0.7055819630622864, "learning_rate": 5e-05, "loss": 0.223, "step": 1103 }, { "epoch": 0.34470377019748655, "grad_norm": 0.6631483435630798, "learning_rate": 5e-05, "loss": 0.2171, "step": 1104 }, { "epoch": 0.34501600187339004, "grad_norm": 0.7331700325012207, "learning_rate": 5e-05, "loss": 0.268, "step": 1105 }, { "epoch": 0.3453282335492936, "grad_norm": 0.6451469659805298, "learning_rate": 5e-05, "loss": 0.2309, "step": 1106 }, { "epoch": 0.3456404652251971, "grad_norm": 0.6758521795272827, "learning_rate": 5e-05, "loss": 0.2269, "step": 1107 }, { "epoch": 0.34595269690110064, "grad_norm": 0.6892019510269165, "learning_rate": 5e-05, "loss": 0.2365, "step": 1108 }, { "epoch": 0.34626492857700414, "grad_norm": 0.6396409869194031, "learning_rate": 5e-05, "loss": 0.2152, "step": 1109 }, { "epoch": 0.34657716025290763, "grad_norm": 0.6796300411224365, "learning_rate": 5e-05, "loss": 0.2324, "step": 1110 }, { "epoch": 0.3468893919288112, "grad_norm": 0.6637645363807678, "learning_rate": 5e-05, "loss": 0.2309, "step": 1111 }, { "epoch": 0.3472016236047147, "grad_norm": 0.6751366257667542, "learning_rate": 5e-05, "loss": 0.2344, "step": 1112 }, { "epoch": 0.34751385528061823, "grad_norm": 0.6542922854423523, "learning_rate": 5e-05, "loss": 0.2149, "step": 1113 }, { "epoch": 0.34782608695652173, "grad_norm": 0.6631845235824585, "learning_rate": 5e-05, "loss": 0.2245, "step": 1114 }, { "epoch": 0.3481383186324253, "grad_norm": 0.676128089427948, "learning_rate": 5e-05, "loss": 0.2503, "step": 1115 }, { "epoch": 0.3484505503083288, "grad_norm": 0.6942310333251953, "learning_rate": 5e-05, "loss": 0.246, "step": 1116 }, { "epoch": 0.3487627819842323, "grad_norm": 0.5789714455604553, "learning_rate": 5e-05, "loss": 0.1992, "step": 1117 }, { "epoch": 0.3490750136601358, "grad_norm": 0.6478298902511597, "learning_rate": 5e-05, "loss": 0.2428, "step": 1118 }, { "epoch": 0.3493872453360393, "grad_norm": 0.6723396182060242, "learning_rate": 5e-05, "loss": 0.2283, "step": 1119 }, { "epoch": 0.34969947701194287, "grad_norm": 0.6962315440177917, "learning_rate": 5e-05, "loss": 0.255, "step": 1120 }, { "epoch": 0.35001170868784637, "grad_norm": 0.6274025440216064, "learning_rate": 5e-05, "loss": 0.2065, "step": 1121 }, { "epoch": 0.3503239403637499, "grad_norm": 0.659756064414978, "learning_rate": 5e-05, "loss": 0.2278, "step": 1122 }, { "epoch": 0.3506361720396534, "grad_norm": 0.655425488948822, "learning_rate": 5e-05, "loss": 0.2094, "step": 1123 }, { "epoch": 0.35094840371555697, "grad_norm": 0.6166678071022034, "learning_rate": 5e-05, "loss": 0.2047, "step": 1124 }, { "epoch": 0.35126063539146046, "grad_norm": 0.6851571798324585, "learning_rate": 5e-05, "loss": 0.2263, "step": 1125 }, { "epoch": 0.35157286706736396, "grad_norm": 0.6182982921600342, "learning_rate": 5e-05, "loss": 0.1969, "step": 1126 }, { "epoch": 0.3518850987432675, "grad_norm": 0.6285266280174255, "learning_rate": 5e-05, "loss": 0.2218, "step": 1127 }, { "epoch": 0.352197330419171, "grad_norm": 0.6169029474258423, "learning_rate": 5e-05, "loss": 0.209, "step": 1128 }, { "epoch": 0.35250956209507456, "grad_norm": 0.6310932040214539, "learning_rate": 5e-05, "loss": 0.2271, "step": 1129 }, { "epoch": 0.35282179377097805, "grad_norm": 0.5948508977890015, "learning_rate": 5e-05, "loss": 0.2124, "step": 1130 }, { "epoch": 0.3531340254468816, "grad_norm": 0.6875050663948059, "learning_rate": 5e-05, "loss": 0.2298, "step": 1131 }, { "epoch": 0.3534462571227851, "grad_norm": 0.6581018567085266, "learning_rate": 5e-05, "loss": 0.2258, "step": 1132 }, { "epoch": 0.35375848879868865, "grad_norm": 0.6287102103233337, "learning_rate": 5e-05, "loss": 0.2215, "step": 1133 }, { "epoch": 0.35407072047459215, "grad_norm": 0.5988027453422546, "learning_rate": 5e-05, "loss": 0.2069, "step": 1134 }, { "epoch": 0.35438295215049564, "grad_norm": 0.6318877935409546, "learning_rate": 5e-05, "loss": 0.2074, "step": 1135 }, { "epoch": 0.3546951838263992, "grad_norm": 0.6325374245643616, "learning_rate": 5e-05, "loss": 0.2179, "step": 1136 }, { "epoch": 0.3550074155023027, "grad_norm": 0.6628188490867615, "learning_rate": 5e-05, "loss": 0.2269, "step": 1137 }, { "epoch": 0.35531964717820624, "grad_norm": 0.6402429938316345, "learning_rate": 5e-05, "loss": 0.2281, "step": 1138 }, { "epoch": 0.35563187885410974, "grad_norm": 0.6677586436271667, "learning_rate": 5e-05, "loss": 0.2245, "step": 1139 }, { "epoch": 0.3559441105300133, "grad_norm": 0.66090327501297, "learning_rate": 5e-05, "loss": 0.235, "step": 1140 }, { "epoch": 0.3562563422059168, "grad_norm": 0.7713232040405273, "learning_rate": 5e-05, "loss": 0.2193, "step": 1141 }, { "epoch": 0.3565685738818203, "grad_norm": 0.6458501815795898, "learning_rate": 5e-05, "loss": 0.2219, "step": 1142 }, { "epoch": 0.35688080555772383, "grad_norm": 0.6814406514167786, "learning_rate": 5e-05, "loss": 0.2308, "step": 1143 }, { "epoch": 0.35719303723362733, "grad_norm": 0.6555324792861938, "learning_rate": 5e-05, "loss": 0.219, "step": 1144 }, { "epoch": 0.3575052689095309, "grad_norm": 0.6982809901237488, "learning_rate": 5e-05, "loss": 0.2213, "step": 1145 }, { "epoch": 0.3578175005854344, "grad_norm": 0.6628324389457703, "learning_rate": 5e-05, "loss": 0.2211, "step": 1146 }, { "epoch": 0.35812973226133793, "grad_norm": 0.7035375237464905, "learning_rate": 5e-05, "loss": 0.2421, "step": 1147 }, { "epoch": 0.3584419639372414, "grad_norm": 0.6219911575317383, "learning_rate": 5e-05, "loss": 0.2255, "step": 1148 }, { "epoch": 0.358754195613145, "grad_norm": 0.650831401348114, "learning_rate": 5e-05, "loss": 0.2124, "step": 1149 }, { "epoch": 0.3590664272890485, "grad_norm": 0.6118208169937134, "learning_rate": 5e-05, "loss": 0.2076, "step": 1150 }, { "epoch": 0.35937865896495197, "grad_norm": 0.63581383228302, "learning_rate": 5e-05, "loss": 0.221, "step": 1151 }, { "epoch": 0.3596908906408555, "grad_norm": 0.6658530235290527, "learning_rate": 5e-05, "loss": 0.2372, "step": 1152 }, { "epoch": 0.360003122316759, "grad_norm": 0.6596976518630981, "learning_rate": 5e-05, "loss": 0.2259, "step": 1153 }, { "epoch": 0.36031535399266257, "grad_norm": 0.6291460394859314, "learning_rate": 5e-05, "loss": 0.2155, "step": 1154 }, { "epoch": 0.36062758566856606, "grad_norm": 0.7241662740707397, "learning_rate": 5e-05, "loss": 0.2372, "step": 1155 }, { "epoch": 0.3609398173444696, "grad_norm": 0.597844660282135, "learning_rate": 5e-05, "loss": 0.2085, "step": 1156 }, { "epoch": 0.3612520490203731, "grad_norm": 0.6248776316642761, "learning_rate": 5e-05, "loss": 0.2131, "step": 1157 }, { "epoch": 0.36156428069627666, "grad_norm": 0.6330453157424927, "learning_rate": 5e-05, "loss": 0.2139, "step": 1158 }, { "epoch": 0.36187651237218016, "grad_norm": 0.63691645860672, "learning_rate": 5e-05, "loss": 0.2223, "step": 1159 }, { "epoch": 0.36218874404808366, "grad_norm": 0.6492080688476562, "learning_rate": 5e-05, "loss": 0.2225, "step": 1160 }, { "epoch": 0.3625009757239872, "grad_norm": 0.6657512784004211, "learning_rate": 5e-05, "loss": 0.2255, "step": 1161 }, { "epoch": 0.3628132073998907, "grad_norm": 0.6312220692634583, "learning_rate": 5e-05, "loss": 0.2079, "step": 1162 }, { "epoch": 0.36312543907579425, "grad_norm": 0.6007552146911621, "learning_rate": 5e-05, "loss": 0.2107, "step": 1163 }, { "epoch": 0.36343767075169775, "grad_norm": 0.6763321757316589, "learning_rate": 5e-05, "loss": 0.2439, "step": 1164 }, { "epoch": 0.3637499024276013, "grad_norm": 0.6338094472885132, "learning_rate": 5e-05, "loss": 0.2072, "step": 1165 }, { "epoch": 0.3640621341035048, "grad_norm": 0.6397779583930969, "learning_rate": 5e-05, "loss": 0.2293, "step": 1166 }, { "epoch": 0.3643743657794083, "grad_norm": 0.6897631287574768, "learning_rate": 5e-05, "loss": 0.2256, "step": 1167 }, { "epoch": 0.36468659745531185, "grad_norm": 0.654867947101593, "learning_rate": 5e-05, "loss": 0.2122, "step": 1168 }, { "epoch": 0.36499882913121534, "grad_norm": 0.6529620289802551, "learning_rate": 5e-05, "loss": 0.2299, "step": 1169 }, { "epoch": 0.3653110608071189, "grad_norm": 0.7218067646026611, "learning_rate": 5e-05, "loss": 0.2322, "step": 1170 }, { "epoch": 0.3656232924830224, "grad_norm": 0.6397863626480103, "learning_rate": 5e-05, "loss": 0.2165, "step": 1171 }, { "epoch": 0.36593552415892594, "grad_norm": 0.6884912848472595, "learning_rate": 5e-05, "loss": 0.2421, "step": 1172 }, { "epoch": 0.36624775583482944, "grad_norm": 0.6436288356781006, "learning_rate": 5e-05, "loss": 0.2121, "step": 1173 }, { "epoch": 0.366559987510733, "grad_norm": 0.6826706528663635, "learning_rate": 5e-05, "loss": 0.241, "step": 1174 }, { "epoch": 0.3668722191866365, "grad_norm": 0.628265917301178, "learning_rate": 5e-05, "loss": 0.216, "step": 1175 }, { "epoch": 0.36718445086254, "grad_norm": 0.6698101162910461, "learning_rate": 5e-05, "loss": 0.2283, "step": 1176 }, { "epoch": 0.36749668253844353, "grad_norm": 0.6484993696212769, "learning_rate": 5e-05, "loss": 0.2464, "step": 1177 }, { "epoch": 0.367808914214347, "grad_norm": 0.6139289736747742, "learning_rate": 5e-05, "loss": 0.1952, "step": 1178 }, { "epoch": 0.3681211458902506, "grad_norm": 0.6424960494041443, "learning_rate": 5e-05, "loss": 0.2038, "step": 1179 }, { "epoch": 0.3684333775661541, "grad_norm": 0.5943055152893066, "learning_rate": 5e-05, "loss": 0.2099, "step": 1180 }, { "epoch": 0.3687456092420576, "grad_norm": 0.6718709468841553, "learning_rate": 5e-05, "loss": 0.2577, "step": 1181 }, { "epoch": 0.3690578409179611, "grad_norm": 0.6143738031387329, "learning_rate": 5e-05, "loss": 0.2122, "step": 1182 }, { "epoch": 0.3693700725938647, "grad_norm": 0.621602475643158, "learning_rate": 5e-05, "loss": 0.2076, "step": 1183 }, { "epoch": 0.36968230426976817, "grad_norm": 0.6573377251625061, "learning_rate": 5e-05, "loss": 0.2399, "step": 1184 }, { "epoch": 0.36999453594567167, "grad_norm": 0.6469151973724365, "learning_rate": 5e-05, "loss": 0.2216, "step": 1185 }, { "epoch": 0.3703067676215752, "grad_norm": 0.594005286693573, "learning_rate": 5e-05, "loss": 0.2111, "step": 1186 }, { "epoch": 0.3706189992974787, "grad_norm": 0.6483307480812073, "learning_rate": 5e-05, "loss": 0.2212, "step": 1187 }, { "epoch": 0.37093123097338226, "grad_norm": 0.672209620475769, "learning_rate": 5e-05, "loss": 0.2242, "step": 1188 }, { "epoch": 0.37124346264928576, "grad_norm": 0.6279879808425903, "learning_rate": 5e-05, "loss": 0.2159, "step": 1189 }, { "epoch": 0.3715556943251893, "grad_norm": 0.6273097395896912, "learning_rate": 5e-05, "loss": 0.2123, "step": 1190 }, { "epoch": 0.3718679260010928, "grad_norm": 0.6061847805976868, "learning_rate": 5e-05, "loss": 0.2083, "step": 1191 }, { "epoch": 0.3721801576769963, "grad_norm": 0.6302304267883301, "learning_rate": 5e-05, "loss": 0.213, "step": 1192 }, { "epoch": 0.37249238935289986, "grad_norm": 0.6610916256904602, "learning_rate": 5e-05, "loss": 0.226, "step": 1193 }, { "epoch": 0.37280462102880335, "grad_norm": 0.6414832472801208, "learning_rate": 5e-05, "loss": 0.2316, "step": 1194 }, { "epoch": 0.3731168527047069, "grad_norm": 0.6166239380836487, "learning_rate": 5e-05, "loss": 0.2152, "step": 1195 }, { "epoch": 0.3734290843806104, "grad_norm": 0.6228855848312378, "learning_rate": 5e-05, "loss": 0.2313, "step": 1196 }, { "epoch": 0.37374131605651395, "grad_norm": 0.6361050605773926, "learning_rate": 5e-05, "loss": 0.2223, "step": 1197 }, { "epoch": 0.37405354773241745, "grad_norm": 0.6587837934494019, "learning_rate": 5e-05, "loss": 0.2217, "step": 1198 }, { "epoch": 0.374365779408321, "grad_norm": 0.621182918548584, "learning_rate": 5e-05, "loss": 0.2077, "step": 1199 }, { "epoch": 0.3746780110842245, "grad_norm": 0.612017035484314, "learning_rate": 5e-05, "loss": 0.2285, "step": 1200 }, { "epoch": 0.374990242760128, "grad_norm": 0.69352126121521, "learning_rate": 5e-05, "loss": 0.2266, "step": 1201 }, { "epoch": 0.37530247443603154, "grad_norm": 0.6495543718338013, "learning_rate": 5e-05, "loss": 0.2105, "step": 1202 }, { "epoch": 0.37561470611193504, "grad_norm": 0.6553254723548889, "learning_rate": 5e-05, "loss": 0.2181, "step": 1203 }, { "epoch": 0.3759269377878386, "grad_norm": 0.6451044082641602, "learning_rate": 5e-05, "loss": 0.2154, "step": 1204 }, { "epoch": 0.3762391694637421, "grad_norm": 0.6493386626243591, "learning_rate": 5e-05, "loss": 0.2246, "step": 1205 }, { "epoch": 0.37655140113964564, "grad_norm": 0.6361973285675049, "learning_rate": 5e-05, "loss": 0.2309, "step": 1206 }, { "epoch": 0.37686363281554913, "grad_norm": 0.6308500170707703, "learning_rate": 5e-05, "loss": 0.2149, "step": 1207 }, { "epoch": 0.3771758644914527, "grad_norm": 0.6551468968391418, "learning_rate": 5e-05, "loss": 0.2233, "step": 1208 }, { "epoch": 0.3774880961673562, "grad_norm": 0.648240864276886, "learning_rate": 5e-05, "loss": 0.2188, "step": 1209 }, { "epoch": 0.3778003278432597, "grad_norm": 0.6460057497024536, "learning_rate": 5e-05, "loss": 0.2262, "step": 1210 }, { "epoch": 0.3781125595191632, "grad_norm": 0.6757068634033203, "learning_rate": 5e-05, "loss": 0.2254, "step": 1211 }, { "epoch": 0.3784247911950667, "grad_norm": 0.6331005692481995, "learning_rate": 5e-05, "loss": 0.2213, "step": 1212 }, { "epoch": 0.3787370228709703, "grad_norm": 0.6981515884399414, "learning_rate": 5e-05, "loss": 0.2437, "step": 1213 }, { "epoch": 0.37904925454687377, "grad_norm": 0.6217281818389893, "learning_rate": 5e-05, "loss": 0.2108, "step": 1214 }, { "epoch": 0.3793614862227773, "grad_norm": 0.6111871004104614, "learning_rate": 5e-05, "loss": 0.2288, "step": 1215 }, { "epoch": 0.3796737178986808, "grad_norm": 0.6117753982543945, "learning_rate": 5e-05, "loss": 0.2043, "step": 1216 }, { "epoch": 0.3799859495745843, "grad_norm": 0.6395655870437622, "learning_rate": 5e-05, "loss": 0.2225, "step": 1217 }, { "epoch": 0.38029818125048787, "grad_norm": 0.700041651725769, "learning_rate": 5e-05, "loss": 0.2134, "step": 1218 }, { "epoch": 0.38061041292639136, "grad_norm": 0.6702867746353149, "learning_rate": 5e-05, "loss": 0.2363, "step": 1219 }, { "epoch": 0.3809226446022949, "grad_norm": 0.6437529921531677, "learning_rate": 5e-05, "loss": 0.2272, "step": 1220 }, { "epoch": 0.3812348762781984, "grad_norm": 0.6552514433860779, "learning_rate": 5e-05, "loss": 0.24, "step": 1221 }, { "epoch": 0.38154710795410196, "grad_norm": 0.6825912594795227, "learning_rate": 5e-05, "loss": 0.2445, "step": 1222 }, { "epoch": 0.38185933963000546, "grad_norm": 0.6498592495918274, "learning_rate": 5e-05, "loss": 0.213, "step": 1223 }, { "epoch": 0.382171571305909, "grad_norm": 0.6977916359901428, "learning_rate": 5e-05, "loss": 0.236, "step": 1224 }, { "epoch": 0.3824838029818125, "grad_norm": 0.6876641511917114, "learning_rate": 5e-05, "loss": 0.2152, "step": 1225 }, { "epoch": 0.382796034657716, "grad_norm": 0.6778894662857056, "learning_rate": 5e-05, "loss": 0.2198, "step": 1226 }, { "epoch": 0.38310826633361955, "grad_norm": 0.6617059111595154, "learning_rate": 5e-05, "loss": 0.2042, "step": 1227 }, { "epoch": 0.38342049800952305, "grad_norm": 0.6592621803283691, "learning_rate": 5e-05, "loss": 0.2245, "step": 1228 }, { "epoch": 0.3837327296854266, "grad_norm": 0.6243909597396851, "learning_rate": 5e-05, "loss": 0.2195, "step": 1229 }, { "epoch": 0.3840449613613301, "grad_norm": 0.6050933599472046, "learning_rate": 5e-05, "loss": 0.2146, "step": 1230 }, { "epoch": 0.38435719303723365, "grad_norm": 0.6187313199043274, "learning_rate": 5e-05, "loss": 0.2142, "step": 1231 }, { "epoch": 0.38466942471313714, "grad_norm": 0.6520898938179016, "learning_rate": 5e-05, "loss": 0.2236, "step": 1232 }, { "epoch": 0.3849816563890407, "grad_norm": 0.6544625759124756, "learning_rate": 5e-05, "loss": 0.22, "step": 1233 }, { "epoch": 0.3852938880649442, "grad_norm": 0.6460723280906677, "learning_rate": 5e-05, "loss": 0.2079, "step": 1234 }, { "epoch": 0.3856061197408477, "grad_norm": 0.6601855754852295, "learning_rate": 5e-05, "loss": 0.2197, "step": 1235 }, { "epoch": 0.38591835141675124, "grad_norm": 0.6284397840499878, "learning_rate": 5e-05, "loss": 0.2291, "step": 1236 }, { "epoch": 0.38623058309265473, "grad_norm": 0.644352912902832, "learning_rate": 5e-05, "loss": 0.2255, "step": 1237 }, { "epoch": 0.3865428147685583, "grad_norm": 0.6682161688804626, "learning_rate": 5e-05, "loss": 0.239, "step": 1238 }, { "epoch": 0.3868550464444618, "grad_norm": 0.6234216690063477, "learning_rate": 5e-05, "loss": 0.2177, "step": 1239 }, { "epoch": 0.38716727812036533, "grad_norm": 0.681407630443573, "learning_rate": 5e-05, "loss": 0.2489, "step": 1240 }, { "epoch": 0.38747950979626883, "grad_norm": 0.6268501281738281, "learning_rate": 5e-05, "loss": 0.2087, "step": 1241 }, { "epoch": 0.3877917414721723, "grad_norm": 0.6806305646896362, "learning_rate": 5e-05, "loss": 0.2283, "step": 1242 }, { "epoch": 0.3881039731480759, "grad_norm": 0.630856454372406, "learning_rate": 5e-05, "loss": 0.1997, "step": 1243 }, { "epoch": 0.38841620482397937, "grad_norm": 0.6769154071807861, "learning_rate": 5e-05, "loss": 0.2304, "step": 1244 }, { "epoch": 0.3887284364998829, "grad_norm": 0.6164240837097168, "learning_rate": 5e-05, "loss": 0.2236, "step": 1245 }, { "epoch": 0.3890406681757864, "grad_norm": 0.6172088384628296, "learning_rate": 5e-05, "loss": 0.2103, "step": 1246 }, { "epoch": 0.38935289985168997, "grad_norm": 0.6521167159080505, "learning_rate": 5e-05, "loss": 0.2078, "step": 1247 }, { "epoch": 0.38966513152759347, "grad_norm": 0.6662593483924866, "learning_rate": 5e-05, "loss": 0.2256, "step": 1248 }, { "epoch": 0.389977363203497, "grad_norm": 0.6389980912208557, "learning_rate": 5e-05, "loss": 0.2107, "step": 1249 }, { "epoch": 0.3902895948794005, "grad_norm": 0.6651821732521057, "learning_rate": 5e-05, "loss": 0.2293, "step": 1250 }, { "epoch": 0.390601826555304, "grad_norm": 0.6879059672355652, "learning_rate": 5e-05, "loss": 0.2328, "step": 1251 }, { "epoch": 0.39091405823120756, "grad_norm": 0.6691357493400574, "learning_rate": 5e-05, "loss": 0.2337, "step": 1252 }, { "epoch": 0.39122628990711106, "grad_norm": 0.6735159158706665, "learning_rate": 5e-05, "loss": 0.2354, "step": 1253 }, { "epoch": 0.3915385215830146, "grad_norm": 0.62106853723526, "learning_rate": 5e-05, "loss": 0.2236, "step": 1254 }, { "epoch": 0.3918507532589181, "grad_norm": 0.6303374171257019, "learning_rate": 5e-05, "loss": 0.2311, "step": 1255 }, { "epoch": 0.39216298493482166, "grad_norm": 0.696164071559906, "learning_rate": 5e-05, "loss": 0.2198, "step": 1256 }, { "epoch": 0.39247521661072515, "grad_norm": 0.6678643226623535, "learning_rate": 5e-05, "loss": 0.2335, "step": 1257 }, { "epoch": 0.3927874482866287, "grad_norm": 0.6078442335128784, "learning_rate": 5e-05, "loss": 0.2095, "step": 1258 }, { "epoch": 0.3930996799625322, "grad_norm": 0.6786478161811829, "learning_rate": 5e-05, "loss": 0.2429, "step": 1259 }, { "epoch": 0.3934119116384357, "grad_norm": 0.6784607768058777, "learning_rate": 5e-05, "loss": 0.2492, "step": 1260 }, { "epoch": 0.39372414331433925, "grad_norm": 0.6739345788955688, "learning_rate": 5e-05, "loss": 0.2521, "step": 1261 }, { "epoch": 0.39403637499024274, "grad_norm": 0.6484004259109497, "learning_rate": 5e-05, "loss": 0.2316, "step": 1262 }, { "epoch": 0.3943486066661463, "grad_norm": 0.6887271404266357, "learning_rate": 5e-05, "loss": 0.2312, "step": 1263 }, { "epoch": 0.3946608383420498, "grad_norm": 0.691074550151825, "learning_rate": 5e-05, "loss": 0.2345, "step": 1264 }, { "epoch": 0.39497307001795334, "grad_norm": 0.6378345489501953, "learning_rate": 5e-05, "loss": 0.2075, "step": 1265 }, { "epoch": 0.39528530169385684, "grad_norm": 0.6551834940910339, "learning_rate": 5e-05, "loss": 0.2268, "step": 1266 }, { "epoch": 0.39559753336976033, "grad_norm": 0.6654781699180603, "learning_rate": 5e-05, "loss": 0.2341, "step": 1267 }, { "epoch": 0.3959097650456639, "grad_norm": 0.6300109624862671, "learning_rate": 5e-05, "loss": 0.2217, "step": 1268 }, { "epoch": 0.3962219967215674, "grad_norm": 0.6462953686714172, "learning_rate": 5e-05, "loss": 0.2159, "step": 1269 }, { "epoch": 0.39653422839747093, "grad_norm": 0.623807430267334, "learning_rate": 5e-05, "loss": 0.2051, "step": 1270 }, { "epoch": 0.39684646007337443, "grad_norm": 0.640749454498291, "learning_rate": 5e-05, "loss": 0.2223, "step": 1271 }, { "epoch": 0.397158691749278, "grad_norm": 0.6195659637451172, "learning_rate": 5e-05, "loss": 0.2227, "step": 1272 }, { "epoch": 0.3974709234251815, "grad_norm": 0.6755836606025696, "learning_rate": 5e-05, "loss": 0.2255, "step": 1273 }, { "epoch": 0.39778315510108503, "grad_norm": 0.6324487924575806, "learning_rate": 5e-05, "loss": 0.2232, "step": 1274 }, { "epoch": 0.3980953867769885, "grad_norm": 0.6364821791648865, "learning_rate": 5e-05, "loss": 0.2203, "step": 1275 }, { "epoch": 0.398407618452892, "grad_norm": 0.6246048212051392, "learning_rate": 5e-05, "loss": 0.2253, "step": 1276 }, { "epoch": 0.39871985012879557, "grad_norm": 0.6182380318641663, "learning_rate": 5e-05, "loss": 0.2099, "step": 1277 }, { "epoch": 0.39903208180469907, "grad_norm": 0.661795973777771, "learning_rate": 5e-05, "loss": 0.2097, "step": 1278 }, { "epoch": 0.3993443134806026, "grad_norm": 0.6273604035377502, "learning_rate": 5e-05, "loss": 0.2265, "step": 1279 }, { "epoch": 0.3996565451565061, "grad_norm": 0.6219443678855896, "learning_rate": 5e-05, "loss": 0.2223, "step": 1280 }, { "epoch": 0.39996877683240967, "grad_norm": 0.6207586526870728, "learning_rate": 5e-05, "loss": 0.1997, "step": 1281 }, { "epoch": 0.40028100850831316, "grad_norm": 0.6568509340286255, "learning_rate": 5e-05, "loss": 0.2057, "step": 1282 }, { "epoch": 0.4005932401842167, "grad_norm": 0.6428727507591248, "learning_rate": 5e-05, "loss": 0.2338, "step": 1283 }, { "epoch": 0.4009054718601202, "grad_norm": 0.6474907398223877, "learning_rate": 5e-05, "loss": 0.2086, "step": 1284 }, { "epoch": 0.4012177035360237, "grad_norm": 0.574100136756897, "learning_rate": 5e-05, "loss": 0.1924, "step": 1285 }, { "epoch": 0.40152993521192726, "grad_norm": 0.6658748984336853, "learning_rate": 5e-05, "loss": 0.2307, "step": 1286 }, { "epoch": 0.40184216688783075, "grad_norm": 0.6467745304107666, "learning_rate": 5e-05, "loss": 0.2198, "step": 1287 }, { "epoch": 0.4021543985637343, "grad_norm": 0.6880146265029907, "learning_rate": 5e-05, "loss": 0.228, "step": 1288 }, { "epoch": 0.4024666302396378, "grad_norm": 0.6421787142753601, "learning_rate": 5e-05, "loss": 0.2193, "step": 1289 }, { "epoch": 0.40277886191554135, "grad_norm": 0.6525007486343384, "learning_rate": 5e-05, "loss": 0.2325, "step": 1290 }, { "epoch": 0.40309109359144485, "grad_norm": 0.6655361652374268, "learning_rate": 5e-05, "loss": 0.239, "step": 1291 }, { "epoch": 0.40340332526734834, "grad_norm": 0.6345336437225342, "learning_rate": 5e-05, "loss": 0.224, "step": 1292 }, { "epoch": 0.4037155569432519, "grad_norm": 0.6376466155052185, "learning_rate": 5e-05, "loss": 0.2189, "step": 1293 }, { "epoch": 0.4040277886191554, "grad_norm": 0.6531116366386414, "learning_rate": 5e-05, "loss": 0.2265, "step": 1294 }, { "epoch": 0.40434002029505894, "grad_norm": 0.6560792922973633, "learning_rate": 5e-05, "loss": 0.216, "step": 1295 }, { "epoch": 0.40465225197096244, "grad_norm": 0.6195251941680908, "learning_rate": 5e-05, "loss": 0.2173, "step": 1296 }, { "epoch": 0.404964483646866, "grad_norm": 0.6419579386711121, "learning_rate": 5e-05, "loss": 0.2272, "step": 1297 }, { "epoch": 0.4052767153227695, "grad_norm": 0.673290491104126, "learning_rate": 5e-05, "loss": 0.2492, "step": 1298 }, { "epoch": 0.40558894699867304, "grad_norm": 0.6532256007194519, "learning_rate": 5e-05, "loss": 0.2438, "step": 1299 }, { "epoch": 0.40590117867457653, "grad_norm": 0.6227350831031799, "learning_rate": 5e-05, "loss": 0.2101, "step": 1300 }, { "epoch": 0.40621341035048003, "grad_norm": 0.6378641724586487, "learning_rate": 5e-05, "loss": 0.2263, "step": 1301 }, { "epoch": 0.4065256420263836, "grad_norm": 0.6483761072158813, "learning_rate": 5e-05, "loss": 0.2262, "step": 1302 }, { "epoch": 0.4068378737022871, "grad_norm": 0.6263547539710999, "learning_rate": 5e-05, "loss": 0.2207, "step": 1303 }, { "epoch": 0.40715010537819063, "grad_norm": 0.679017186164856, "learning_rate": 5e-05, "loss": 0.2378, "step": 1304 }, { "epoch": 0.4074623370540941, "grad_norm": 0.6299603581428528, "learning_rate": 5e-05, "loss": 0.2145, "step": 1305 }, { "epoch": 0.4077745687299977, "grad_norm": 0.642493486404419, "learning_rate": 5e-05, "loss": 0.2208, "step": 1306 }, { "epoch": 0.4080868004059012, "grad_norm": 0.6523114442825317, "learning_rate": 5e-05, "loss": 0.2239, "step": 1307 }, { "epoch": 0.4083990320818047, "grad_norm": 0.6368314027786255, "learning_rate": 5e-05, "loss": 0.2353, "step": 1308 }, { "epoch": 0.4087112637577082, "grad_norm": 0.6655653119087219, "learning_rate": 5e-05, "loss": 0.2357, "step": 1309 }, { "epoch": 0.4090234954336117, "grad_norm": 0.662837028503418, "learning_rate": 5e-05, "loss": 0.2161, "step": 1310 }, { "epoch": 0.40933572710951527, "grad_norm": 0.6319391131401062, "learning_rate": 5e-05, "loss": 0.2221, "step": 1311 }, { "epoch": 0.40964795878541876, "grad_norm": 0.6341718435287476, "learning_rate": 5e-05, "loss": 0.211, "step": 1312 }, { "epoch": 0.4099601904613223, "grad_norm": 0.6636045575141907, "learning_rate": 5e-05, "loss": 0.2383, "step": 1313 }, { "epoch": 0.4102724221372258, "grad_norm": 0.6255361437797546, "learning_rate": 5e-05, "loss": 0.229, "step": 1314 }, { "epoch": 0.41058465381312936, "grad_norm": 0.6047961711883545, "learning_rate": 5e-05, "loss": 0.2011, "step": 1315 }, { "epoch": 0.41089688548903286, "grad_norm": 0.6219202876091003, "learning_rate": 5e-05, "loss": 0.2118, "step": 1316 }, { "epoch": 0.41120911716493636, "grad_norm": 0.6608145833015442, "learning_rate": 5e-05, "loss": 0.2237, "step": 1317 }, { "epoch": 0.4115213488408399, "grad_norm": 0.6505046486854553, "learning_rate": 5e-05, "loss": 0.2348, "step": 1318 }, { "epoch": 0.4118335805167434, "grad_norm": 0.6211758255958557, "learning_rate": 5e-05, "loss": 0.2232, "step": 1319 }, { "epoch": 0.41214581219264695, "grad_norm": 0.6564857959747314, "learning_rate": 5e-05, "loss": 0.2309, "step": 1320 }, { "epoch": 0.41245804386855045, "grad_norm": 0.6978774070739746, "learning_rate": 5e-05, "loss": 0.2477, "step": 1321 }, { "epoch": 0.412770275544454, "grad_norm": 0.6460022926330566, "learning_rate": 5e-05, "loss": 0.2236, "step": 1322 }, { "epoch": 0.4130825072203575, "grad_norm": 0.6760416030883789, "learning_rate": 5e-05, "loss": 0.2278, "step": 1323 }, { "epoch": 0.41339473889626105, "grad_norm": 0.7020403742790222, "learning_rate": 5e-05, "loss": 0.2338, "step": 1324 }, { "epoch": 0.41370697057216455, "grad_norm": 0.6544322967529297, "learning_rate": 5e-05, "loss": 0.2216, "step": 1325 }, { "epoch": 0.41401920224806804, "grad_norm": 0.6608328819274902, "learning_rate": 5e-05, "loss": 0.2267, "step": 1326 }, { "epoch": 0.4143314339239716, "grad_norm": 0.6309044361114502, "learning_rate": 5e-05, "loss": 0.2111, "step": 1327 }, { "epoch": 0.4146436655998751, "grad_norm": 0.606290340423584, "learning_rate": 5e-05, "loss": 0.2152, "step": 1328 }, { "epoch": 0.41495589727577864, "grad_norm": 0.6100196242332458, "learning_rate": 5e-05, "loss": 0.1951, "step": 1329 }, { "epoch": 0.41526812895168214, "grad_norm": 0.6367553472518921, "learning_rate": 5e-05, "loss": 0.2338, "step": 1330 }, { "epoch": 0.4155803606275857, "grad_norm": 0.6265515089035034, "learning_rate": 5e-05, "loss": 0.2272, "step": 1331 }, { "epoch": 0.4158925923034892, "grad_norm": 0.6272140145301819, "learning_rate": 5e-05, "loss": 0.2024, "step": 1332 }, { "epoch": 0.41620482397939274, "grad_norm": 0.7053614258766174, "learning_rate": 5e-05, "loss": 0.2414, "step": 1333 }, { "epoch": 0.41651705565529623, "grad_norm": 0.6259044408798218, "learning_rate": 5e-05, "loss": 0.2201, "step": 1334 }, { "epoch": 0.4168292873311997, "grad_norm": 0.6602820754051208, "learning_rate": 5e-05, "loss": 0.2462, "step": 1335 }, { "epoch": 0.4171415190071033, "grad_norm": 0.6350888609886169, "learning_rate": 5e-05, "loss": 0.2274, "step": 1336 }, { "epoch": 0.4174537506830068, "grad_norm": 0.6508819460868835, "learning_rate": 5e-05, "loss": 0.2259, "step": 1337 }, { "epoch": 0.4177659823589103, "grad_norm": 0.6586160063743591, "learning_rate": 5e-05, "loss": 0.2265, "step": 1338 }, { "epoch": 0.4180782140348138, "grad_norm": 0.6016979217529297, "learning_rate": 5e-05, "loss": 0.2029, "step": 1339 }, { "epoch": 0.4183904457107174, "grad_norm": 0.6451661586761475, "learning_rate": 5e-05, "loss": 0.2113, "step": 1340 }, { "epoch": 0.41870267738662087, "grad_norm": 0.6041579246520996, "learning_rate": 5e-05, "loss": 0.2149, "step": 1341 }, { "epoch": 0.41901490906252437, "grad_norm": 0.6412055492401123, "learning_rate": 5e-05, "loss": 0.2286, "step": 1342 }, { "epoch": 0.4193271407384279, "grad_norm": 0.6110237836837769, "learning_rate": 5e-05, "loss": 0.2176, "step": 1343 }, { "epoch": 0.4196393724143314, "grad_norm": 0.638297438621521, "learning_rate": 5e-05, "loss": 0.2302, "step": 1344 }, { "epoch": 0.41995160409023496, "grad_norm": 0.6698467135429382, "learning_rate": 5e-05, "loss": 0.2425, "step": 1345 }, { "epoch": 0.42026383576613846, "grad_norm": 0.6120096445083618, "learning_rate": 5e-05, "loss": 0.211, "step": 1346 }, { "epoch": 0.420576067442042, "grad_norm": 0.7014675736427307, "learning_rate": 5e-05, "loss": 0.235, "step": 1347 }, { "epoch": 0.4208882991179455, "grad_norm": 0.6308057904243469, "learning_rate": 5e-05, "loss": 0.2155, "step": 1348 }, { "epoch": 0.42120053079384906, "grad_norm": 0.6519820094108582, "learning_rate": 5e-05, "loss": 0.232, "step": 1349 }, { "epoch": 0.42151276246975256, "grad_norm": 0.6387293338775635, "learning_rate": 5e-05, "loss": 0.2064, "step": 1350 }, { "epoch": 0.42182499414565605, "grad_norm": 0.6434894800186157, "learning_rate": 5e-05, "loss": 0.2295, "step": 1351 }, { "epoch": 0.4221372258215596, "grad_norm": 0.6291279196739197, "learning_rate": 5e-05, "loss": 0.2298, "step": 1352 }, { "epoch": 0.4224494574974631, "grad_norm": 0.6456338763237, "learning_rate": 5e-05, "loss": 0.2161, "step": 1353 }, { "epoch": 0.42276168917336665, "grad_norm": 0.6233469247817993, "learning_rate": 5e-05, "loss": 0.2292, "step": 1354 }, { "epoch": 0.42307392084927015, "grad_norm": 0.6552309989929199, "learning_rate": 5e-05, "loss": 0.2418, "step": 1355 }, { "epoch": 0.4233861525251737, "grad_norm": 0.655258059501648, "learning_rate": 5e-05, "loss": 0.2211, "step": 1356 }, { "epoch": 0.4236983842010772, "grad_norm": 0.6295210719108582, "learning_rate": 5e-05, "loss": 0.2244, "step": 1357 }, { "epoch": 0.42401061587698075, "grad_norm": 0.6290527582168579, "learning_rate": 5e-05, "loss": 0.2242, "step": 1358 }, { "epoch": 0.42432284755288424, "grad_norm": 0.6334994435310364, "learning_rate": 5e-05, "loss": 0.2275, "step": 1359 }, { "epoch": 0.42463507922878774, "grad_norm": 0.6458233594894409, "learning_rate": 5e-05, "loss": 0.2332, "step": 1360 }, { "epoch": 0.4249473109046913, "grad_norm": 0.6081159710884094, "learning_rate": 5e-05, "loss": 0.208, "step": 1361 }, { "epoch": 0.4252595425805948, "grad_norm": 0.6449544429779053, "learning_rate": 5e-05, "loss": 0.2265, "step": 1362 }, { "epoch": 0.42557177425649834, "grad_norm": 0.6251835227012634, "learning_rate": 5e-05, "loss": 0.2209, "step": 1363 }, { "epoch": 0.42588400593240183, "grad_norm": 0.6938860416412354, "learning_rate": 5e-05, "loss": 0.221, "step": 1364 }, { "epoch": 0.4261962376083054, "grad_norm": 0.6508045792579651, "learning_rate": 5e-05, "loss": 0.2163, "step": 1365 }, { "epoch": 0.4265084692842089, "grad_norm": 0.6408339738845825, "learning_rate": 5e-05, "loss": 0.2242, "step": 1366 }, { "epoch": 0.4268207009601124, "grad_norm": 0.6169499754905701, "learning_rate": 5e-05, "loss": 0.2128, "step": 1367 }, { "epoch": 0.4271329326360159, "grad_norm": 0.6122943758964539, "learning_rate": 5e-05, "loss": 0.2111, "step": 1368 }, { "epoch": 0.4274451643119194, "grad_norm": 0.6419656872749329, "learning_rate": 5e-05, "loss": 0.2203, "step": 1369 }, { "epoch": 0.427757395987823, "grad_norm": 0.6475896239280701, "learning_rate": 5e-05, "loss": 0.2269, "step": 1370 }, { "epoch": 0.42806962766372647, "grad_norm": 0.6004589796066284, "learning_rate": 5e-05, "loss": 0.2146, "step": 1371 }, { "epoch": 0.42838185933963, "grad_norm": 0.6301820874214172, "learning_rate": 5e-05, "loss": 0.2184, "step": 1372 }, { "epoch": 0.4286940910155335, "grad_norm": 0.6480997800827026, "learning_rate": 5e-05, "loss": 0.2296, "step": 1373 }, { "epoch": 0.42900632269143707, "grad_norm": 0.638924241065979, "learning_rate": 5e-05, "loss": 0.2234, "step": 1374 }, { "epoch": 0.42931855436734057, "grad_norm": 0.6534156203269958, "learning_rate": 5e-05, "loss": 0.2214, "step": 1375 }, { "epoch": 0.42963078604324406, "grad_norm": 0.6807188987731934, "learning_rate": 5e-05, "loss": 0.2337, "step": 1376 }, { "epoch": 0.4299430177191476, "grad_norm": 0.654633641242981, "learning_rate": 5e-05, "loss": 0.2148, "step": 1377 }, { "epoch": 0.4302552493950511, "grad_norm": 0.657552182674408, "learning_rate": 5e-05, "loss": 0.2186, "step": 1378 }, { "epoch": 0.43056748107095466, "grad_norm": 0.680941641330719, "learning_rate": 5e-05, "loss": 0.2368, "step": 1379 }, { "epoch": 0.43087971274685816, "grad_norm": 0.6526975035667419, "learning_rate": 5e-05, "loss": 0.2288, "step": 1380 }, { "epoch": 0.4311919444227617, "grad_norm": 0.6493653655052185, "learning_rate": 5e-05, "loss": 0.2292, "step": 1381 }, { "epoch": 0.4315041760986652, "grad_norm": 0.6718555688858032, "learning_rate": 5e-05, "loss": 0.2415, "step": 1382 }, { "epoch": 0.43181640777456876, "grad_norm": 0.6094968914985657, "learning_rate": 5e-05, "loss": 0.2206, "step": 1383 }, { "epoch": 0.43212863945047225, "grad_norm": 0.6030204892158508, "learning_rate": 5e-05, "loss": 0.1984, "step": 1384 }, { "epoch": 0.43244087112637575, "grad_norm": 0.650854766368866, "learning_rate": 5e-05, "loss": 0.2373, "step": 1385 }, { "epoch": 0.4327531028022793, "grad_norm": 0.6123471260070801, "learning_rate": 5e-05, "loss": 0.2167, "step": 1386 }, { "epoch": 0.4330653344781828, "grad_norm": 0.6058046221733093, "learning_rate": 5e-05, "loss": 0.2113, "step": 1387 }, { "epoch": 0.43337756615408635, "grad_norm": 0.5989846587181091, "learning_rate": 5e-05, "loss": 0.2035, "step": 1388 }, { "epoch": 0.43368979782998984, "grad_norm": 0.6257761716842651, "learning_rate": 5e-05, "loss": 0.2138, "step": 1389 }, { "epoch": 0.4340020295058934, "grad_norm": 0.6079849004745483, "learning_rate": 5e-05, "loss": 0.2187, "step": 1390 }, { "epoch": 0.4343142611817969, "grad_norm": 0.6094767451286316, "learning_rate": 5e-05, "loss": 0.2028, "step": 1391 }, { "epoch": 0.4346264928577004, "grad_norm": 0.6071872115135193, "learning_rate": 5e-05, "loss": 0.2151, "step": 1392 }, { "epoch": 0.43493872453360394, "grad_norm": 0.6894599795341492, "learning_rate": 5e-05, "loss": 0.2534, "step": 1393 }, { "epoch": 0.43525095620950743, "grad_norm": 0.614250659942627, "learning_rate": 5e-05, "loss": 0.2014, "step": 1394 }, { "epoch": 0.435563187885411, "grad_norm": 0.6282212734222412, "learning_rate": 5e-05, "loss": 0.2026, "step": 1395 }, { "epoch": 0.4358754195613145, "grad_norm": 0.6369844675064087, "learning_rate": 5e-05, "loss": 0.2228, "step": 1396 }, { "epoch": 0.43618765123721803, "grad_norm": 0.631945788860321, "learning_rate": 5e-05, "loss": 0.2225, "step": 1397 }, { "epoch": 0.43649988291312153, "grad_norm": 0.6081124544143677, "learning_rate": 5e-05, "loss": 0.2141, "step": 1398 }, { "epoch": 0.4368121145890251, "grad_norm": 0.6355950832366943, "learning_rate": 5e-05, "loss": 0.2363, "step": 1399 }, { "epoch": 0.4371243462649286, "grad_norm": 0.6046746373176575, "learning_rate": 5e-05, "loss": 0.2107, "step": 1400 }, { "epoch": 0.43743657794083207, "grad_norm": 0.6219913363456726, "learning_rate": 5e-05, "loss": 0.217, "step": 1401 }, { "epoch": 0.4377488096167356, "grad_norm": 0.6361287832260132, "learning_rate": 5e-05, "loss": 0.2118, "step": 1402 }, { "epoch": 0.4380610412926391, "grad_norm": 0.6515527367591858, "learning_rate": 5e-05, "loss": 0.2319, "step": 1403 }, { "epoch": 0.43837327296854267, "grad_norm": 0.6497931480407715, "learning_rate": 5e-05, "loss": 0.2455, "step": 1404 }, { "epoch": 0.43868550464444617, "grad_norm": 0.6445742845535278, "learning_rate": 5e-05, "loss": 0.2007, "step": 1405 }, { "epoch": 0.4389977363203497, "grad_norm": 0.6420057415962219, "learning_rate": 5e-05, "loss": 0.2215, "step": 1406 }, { "epoch": 0.4393099679962532, "grad_norm": 0.6009349226951599, "learning_rate": 5e-05, "loss": 0.2114, "step": 1407 }, { "epoch": 0.43962219967215677, "grad_norm": 0.636425256729126, "learning_rate": 5e-05, "loss": 0.2184, "step": 1408 }, { "epoch": 0.43993443134806026, "grad_norm": 0.6369946599006653, "learning_rate": 5e-05, "loss": 0.2203, "step": 1409 }, { "epoch": 0.44024666302396376, "grad_norm": 0.6191074252128601, "learning_rate": 5e-05, "loss": 0.2221, "step": 1410 }, { "epoch": 0.4405588946998673, "grad_norm": 0.6251648664474487, "learning_rate": 5e-05, "loss": 0.2163, "step": 1411 }, { "epoch": 0.4408711263757708, "grad_norm": 0.6075226664543152, "learning_rate": 5e-05, "loss": 0.1969, "step": 1412 }, { "epoch": 0.44118335805167436, "grad_norm": 0.6690216064453125, "learning_rate": 5e-05, "loss": 0.2347, "step": 1413 }, { "epoch": 0.44149558972757785, "grad_norm": 0.6330466270446777, "learning_rate": 5e-05, "loss": 0.2247, "step": 1414 }, { "epoch": 0.4418078214034814, "grad_norm": 0.6515161395072937, "learning_rate": 5e-05, "loss": 0.2311, "step": 1415 }, { "epoch": 0.4421200530793849, "grad_norm": 0.6484932899475098, "learning_rate": 5e-05, "loss": 0.2313, "step": 1416 }, { "epoch": 0.4424322847552884, "grad_norm": 0.6231474876403809, "learning_rate": 5e-05, "loss": 0.2058, "step": 1417 }, { "epoch": 0.44274451643119195, "grad_norm": 0.6693360805511475, "learning_rate": 5e-05, "loss": 0.2249, "step": 1418 }, { "epoch": 0.44305674810709544, "grad_norm": 0.6467017531394958, "learning_rate": 5e-05, "loss": 0.2171, "step": 1419 }, { "epoch": 0.443368979782999, "grad_norm": 0.6589299440383911, "learning_rate": 5e-05, "loss": 0.2325, "step": 1420 }, { "epoch": 0.4436812114589025, "grad_norm": 0.6492156386375427, "learning_rate": 5e-05, "loss": 0.2392, "step": 1421 }, { "epoch": 0.44399344313480604, "grad_norm": 0.6693766713142395, "learning_rate": 5e-05, "loss": 0.2148, "step": 1422 }, { "epoch": 0.44430567481070954, "grad_norm": 0.6376355886459351, "learning_rate": 5e-05, "loss": 0.2108, "step": 1423 }, { "epoch": 0.4446179064866131, "grad_norm": 0.6528264284133911, "learning_rate": 5e-05, "loss": 0.224, "step": 1424 }, { "epoch": 0.4449301381625166, "grad_norm": 0.668964684009552, "learning_rate": 5e-05, "loss": 0.2377, "step": 1425 }, { "epoch": 0.4452423698384201, "grad_norm": 0.6348416209220886, "learning_rate": 5e-05, "loss": 0.2039, "step": 1426 }, { "epoch": 0.44555460151432363, "grad_norm": 0.617657482624054, "learning_rate": 5e-05, "loss": 0.1969, "step": 1427 }, { "epoch": 0.44586683319022713, "grad_norm": 0.6213597059249878, "learning_rate": 5e-05, "loss": 0.2329, "step": 1428 }, { "epoch": 0.4461790648661307, "grad_norm": 0.6636458039283752, "learning_rate": 5e-05, "loss": 0.2067, "step": 1429 }, { "epoch": 0.4464912965420342, "grad_norm": 0.6539012789726257, "learning_rate": 5e-05, "loss": 0.2315, "step": 1430 }, { "epoch": 0.44680352821793773, "grad_norm": 0.6769692301750183, "learning_rate": 5e-05, "loss": 0.2416, "step": 1431 }, { "epoch": 0.4471157598938412, "grad_norm": 0.633386492729187, "learning_rate": 5e-05, "loss": 0.2199, "step": 1432 }, { "epoch": 0.4474279915697448, "grad_norm": 0.6139927506446838, "learning_rate": 5e-05, "loss": 0.209, "step": 1433 }, { "epoch": 0.44774022324564827, "grad_norm": 0.6294575333595276, "learning_rate": 5e-05, "loss": 0.2259, "step": 1434 }, { "epoch": 0.44805245492155177, "grad_norm": 0.6120661497116089, "learning_rate": 5e-05, "loss": 0.2071, "step": 1435 }, { "epoch": 0.4483646865974553, "grad_norm": 0.6379923224449158, "learning_rate": 5e-05, "loss": 0.2173, "step": 1436 }, { "epoch": 0.4486769182733588, "grad_norm": 0.6585249900817871, "learning_rate": 5e-05, "loss": 0.2362, "step": 1437 }, { "epoch": 0.44898914994926237, "grad_norm": 0.6554802060127258, "learning_rate": 5e-05, "loss": 0.2454, "step": 1438 }, { "epoch": 0.44930138162516586, "grad_norm": 0.6525847911834717, "learning_rate": 5e-05, "loss": 0.2314, "step": 1439 }, { "epoch": 0.4496136133010694, "grad_norm": 0.6382611393928528, "learning_rate": 5e-05, "loss": 0.2216, "step": 1440 }, { "epoch": 0.4499258449769729, "grad_norm": 0.6213090419769287, "learning_rate": 5e-05, "loss": 0.222, "step": 1441 }, { "epoch": 0.4502380766528764, "grad_norm": 0.6516009569168091, "learning_rate": 5e-05, "loss": 0.234, "step": 1442 }, { "epoch": 0.45055030832877996, "grad_norm": 0.6188258528709412, "learning_rate": 5e-05, "loss": 0.2055, "step": 1443 }, { "epoch": 0.45086254000468345, "grad_norm": 0.6447911262512207, "learning_rate": 5e-05, "loss": 0.2296, "step": 1444 }, { "epoch": 0.451174771680587, "grad_norm": 0.6131879687309265, "learning_rate": 5e-05, "loss": 0.2022, "step": 1445 }, { "epoch": 0.4514870033564905, "grad_norm": 0.6388572454452515, "learning_rate": 5e-05, "loss": 0.2226, "step": 1446 }, { "epoch": 0.45179923503239405, "grad_norm": 0.600852906703949, "learning_rate": 5e-05, "loss": 0.2059, "step": 1447 }, { "epoch": 0.45211146670829755, "grad_norm": 0.6690648198127747, "learning_rate": 5e-05, "loss": 0.2262, "step": 1448 }, { "epoch": 0.4524236983842011, "grad_norm": 0.6621968150138855, "learning_rate": 5e-05, "loss": 0.2189, "step": 1449 }, { "epoch": 0.4527359300601046, "grad_norm": 0.6478545069694519, "learning_rate": 5e-05, "loss": 0.2245, "step": 1450 }, { "epoch": 0.4530481617360081, "grad_norm": 0.6355212926864624, "learning_rate": 5e-05, "loss": 0.2275, "step": 1451 }, { "epoch": 0.45336039341191164, "grad_norm": 0.5926360487937927, "learning_rate": 5e-05, "loss": 0.2018, "step": 1452 }, { "epoch": 0.45367262508781514, "grad_norm": 0.6780962347984314, "learning_rate": 5e-05, "loss": 0.2324, "step": 1453 }, { "epoch": 0.4539848567637187, "grad_norm": 0.622908890247345, "learning_rate": 5e-05, "loss": 0.2185, "step": 1454 }, { "epoch": 0.4542970884396222, "grad_norm": 0.6411144137382507, "learning_rate": 5e-05, "loss": 0.2184, "step": 1455 }, { "epoch": 0.45460932011552574, "grad_norm": 0.6268255114555359, "learning_rate": 5e-05, "loss": 0.2202, "step": 1456 }, { "epoch": 0.45492155179142924, "grad_norm": 0.6147963404655457, "learning_rate": 5e-05, "loss": 0.2221, "step": 1457 }, { "epoch": 0.4552337834673328, "grad_norm": 0.6601042151451111, "learning_rate": 5e-05, "loss": 0.2272, "step": 1458 }, { "epoch": 0.4555460151432363, "grad_norm": 0.6658827662467957, "learning_rate": 5e-05, "loss": 0.2215, "step": 1459 }, { "epoch": 0.4558582468191398, "grad_norm": 0.6481369137763977, "learning_rate": 5e-05, "loss": 0.2364, "step": 1460 }, { "epoch": 0.45617047849504333, "grad_norm": 0.6452566385269165, "learning_rate": 5e-05, "loss": 0.231, "step": 1461 }, { "epoch": 0.4564827101709468, "grad_norm": 0.6507308483123779, "learning_rate": 5e-05, "loss": 0.2232, "step": 1462 }, { "epoch": 0.4567949418468504, "grad_norm": 0.6269664168357849, "learning_rate": 5e-05, "loss": 0.2226, "step": 1463 }, { "epoch": 0.4571071735227539, "grad_norm": 0.6580398082733154, "learning_rate": 5e-05, "loss": 0.2322, "step": 1464 }, { "epoch": 0.4574194051986574, "grad_norm": 0.5958406925201416, "learning_rate": 5e-05, "loss": 0.2136, "step": 1465 }, { "epoch": 0.4577316368745609, "grad_norm": 0.657634437084198, "learning_rate": 5e-05, "loss": 0.244, "step": 1466 }, { "epoch": 0.4580438685504644, "grad_norm": 0.6434822678565979, "learning_rate": 5e-05, "loss": 0.223, "step": 1467 }, { "epoch": 0.45835610022636797, "grad_norm": 0.6145699620246887, "learning_rate": 5e-05, "loss": 0.2034, "step": 1468 }, { "epoch": 0.45866833190227146, "grad_norm": 0.6167147755622864, "learning_rate": 5e-05, "loss": 0.2126, "step": 1469 }, { "epoch": 0.458980563578175, "grad_norm": 0.666530966758728, "learning_rate": 5e-05, "loss": 0.2338, "step": 1470 }, { "epoch": 0.4592927952540785, "grad_norm": 0.6634217500686646, "learning_rate": 5e-05, "loss": 0.2252, "step": 1471 }, { "epoch": 0.45960502692998206, "grad_norm": 0.6203908920288086, "learning_rate": 5e-05, "loss": 0.2117, "step": 1472 }, { "epoch": 0.45991725860588556, "grad_norm": 0.6643890738487244, "learning_rate": 5e-05, "loss": 0.2344, "step": 1473 }, { "epoch": 0.4602294902817891, "grad_norm": 0.6824442744255066, "learning_rate": 5e-05, "loss": 0.2186, "step": 1474 }, { "epoch": 0.4605417219576926, "grad_norm": 0.6717156171798706, "learning_rate": 5e-05, "loss": 0.217, "step": 1475 }, { "epoch": 0.4608539536335961, "grad_norm": 0.6381433606147766, "learning_rate": 5e-05, "loss": 0.2231, "step": 1476 }, { "epoch": 0.46116618530949965, "grad_norm": 0.6716656684875488, "learning_rate": 5e-05, "loss": 0.2328, "step": 1477 }, { "epoch": 0.46147841698540315, "grad_norm": 0.6408844590187073, "learning_rate": 5e-05, "loss": 0.2338, "step": 1478 }, { "epoch": 0.4617906486613067, "grad_norm": 0.5794318914413452, "learning_rate": 5e-05, "loss": 0.2111, "step": 1479 }, { "epoch": 0.4621028803372102, "grad_norm": 0.6233298182487488, "learning_rate": 5e-05, "loss": 0.2219, "step": 1480 }, { "epoch": 0.46241511201311375, "grad_norm": 0.6107158064842224, "learning_rate": 5e-05, "loss": 0.2211, "step": 1481 }, { "epoch": 0.46272734368901725, "grad_norm": 0.6046615839004517, "learning_rate": 5e-05, "loss": 0.2176, "step": 1482 }, { "epoch": 0.4630395753649208, "grad_norm": 0.6329900026321411, "learning_rate": 5e-05, "loss": 0.2239, "step": 1483 }, { "epoch": 0.4633518070408243, "grad_norm": 0.6458438634872437, "learning_rate": 5e-05, "loss": 0.2204, "step": 1484 }, { "epoch": 0.4636640387167278, "grad_norm": 0.663244903087616, "learning_rate": 5e-05, "loss": 0.2095, "step": 1485 }, { "epoch": 0.46397627039263134, "grad_norm": 0.6184166073799133, "learning_rate": 5e-05, "loss": 0.2103, "step": 1486 }, { "epoch": 0.46428850206853484, "grad_norm": 0.6321340203285217, "learning_rate": 5e-05, "loss": 0.2211, "step": 1487 }, { "epoch": 0.4646007337444384, "grad_norm": 0.6010296940803528, "learning_rate": 5e-05, "loss": 0.2074, "step": 1488 }, { "epoch": 0.4649129654203419, "grad_norm": 0.6546451449394226, "learning_rate": 5e-05, "loss": 0.2169, "step": 1489 }, { "epoch": 0.46522519709624544, "grad_norm": 0.6623850464820862, "learning_rate": 5e-05, "loss": 0.2295, "step": 1490 }, { "epoch": 0.46553742877214893, "grad_norm": 0.620644211769104, "learning_rate": 5e-05, "loss": 0.2165, "step": 1491 }, { "epoch": 0.4658496604480524, "grad_norm": 0.6194939613342285, "learning_rate": 5e-05, "loss": 0.2124, "step": 1492 }, { "epoch": 0.466161892123956, "grad_norm": 0.6336272358894348, "learning_rate": 5e-05, "loss": 0.2272, "step": 1493 }, { "epoch": 0.4664741237998595, "grad_norm": 0.6511487364768982, "learning_rate": 5e-05, "loss": 0.2201, "step": 1494 }, { "epoch": 0.466786355475763, "grad_norm": 0.6246220469474792, "learning_rate": 5e-05, "loss": 0.2224, "step": 1495 }, { "epoch": 0.4670985871516665, "grad_norm": 0.6348879933357239, "learning_rate": 5e-05, "loss": 0.2053, "step": 1496 }, { "epoch": 0.4674108188275701, "grad_norm": 0.621854305267334, "learning_rate": 5e-05, "loss": 0.2181, "step": 1497 }, { "epoch": 0.46772305050347357, "grad_norm": 0.6444857716560364, "learning_rate": 5e-05, "loss": 0.2167, "step": 1498 }, { "epoch": 0.4680352821793771, "grad_norm": 0.6220207214355469, "learning_rate": 5e-05, "loss": 0.2284, "step": 1499 }, { "epoch": 0.4683475138552806, "grad_norm": 0.6273908019065857, "learning_rate": 5e-05, "loss": 0.2132, "step": 1500 }, { "epoch": 0.4686597455311841, "grad_norm": 0.5959233641624451, "learning_rate": 5e-05, "loss": 0.2015, "step": 1501 }, { "epoch": 0.46897197720708766, "grad_norm": 0.6226284503936768, "learning_rate": 5e-05, "loss": 0.215, "step": 1502 }, { "epoch": 0.46928420888299116, "grad_norm": 0.6241357922554016, "learning_rate": 5e-05, "loss": 0.2069, "step": 1503 }, { "epoch": 0.4695964405588947, "grad_norm": 0.626376211643219, "learning_rate": 5e-05, "loss": 0.2225, "step": 1504 }, { "epoch": 0.4699086722347982, "grad_norm": 0.6325458884239197, "learning_rate": 5e-05, "loss": 0.2259, "step": 1505 }, { "epoch": 0.47022090391070176, "grad_norm": 0.5733172297477722, "learning_rate": 5e-05, "loss": 0.1858, "step": 1506 }, { "epoch": 0.47053313558660526, "grad_norm": 0.6395273208618164, "learning_rate": 5e-05, "loss": 0.2304, "step": 1507 }, { "epoch": 0.4708453672625088, "grad_norm": 0.6263816356658936, "learning_rate": 5e-05, "loss": 0.2254, "step": 1508 }, { "epoch": 0.4711575989384123, "grad_norm": 0.6344569325447083, "learning_rate": 5e-05, "loss": 0.2228, "step": 1509 }, { "epoch": 0.4714698306143158, "grad_norm": 0.6164068579673767, "learning_rate": 5e-05, "loss": 0.215, "step": 1510 }, { "epoch": 0.47178206229021935, "grad_norm": 0.6321871280670166, "learning_rate": 5e-05, "loss": 0.2288, "step": 1511 }, { "epoch": 0.47209429396612285, "grad_norm": 0.6346167325973511, "learning_rate": 5e-05, "loss": 0.2212, "step": 1512 }, { "epoch": 0.4724065256420264, "grad_norm": 0.6037029027938843, "learning_rate": 5e-05, "loss": 0.2031, "step": 1513 }, { "epoch": 0.4727187573179299, "grad_norm": 0.606411337852478, "learning_rate": 5e-05, "loss": 0.201, "step": 1514 }, { "epoch": 0.47303098899383345, "grad_norm": 0.6674056053161621, "learning_rate": 5e-05, "loss": 0.2133, "step": 1515 }, { "epoch": 0.47334322066973694, "grad_norm": 0.6303066611289978, "learning_rate": 5e-05, "loss": 0.2258, "step": 1516 }, { "epoch": 0.47365545234564044, "grad_norm": 0.6161609888076782, "learning_rate": 5e-05, "loss": 0.2253, "step": 1517 }, { "epoch": 0.473967684021544, "grad_norm": 0.6468196511268616, "learning_rate": 5e-05, "loss": 0.2147, "step": 1518 }, { "epoch": 0.4742799156974475, "grad_norm": 0.6318031549453735, "learning_rate": 5e-05, "loss": 0.2347, "step": 1519 }, { "epoch": 0.47459214737335104, "grad_norm": 0.6364319920539856, "learning_rate": 5e-05, "loss": 0.2133, "step": 1520 }, { "epoch": 0.47490437904925453, "grad_norm": 0.6075962781906128, "learning_rate": 5e-05, "loss": 0.2146, "step": 1521 }, { "epoch": 0.4752166107251581, "grad_norm": 0.6407580375671387, "learning_rate": 5e-05, "loss": 0.2276, "step": 1522 }, { "epoch": 0.4755288424010616, "grad_norm": 0.6434010863304138, "learning_rate": 5e-05, "loss": 0.2442, "step": 1523 }, { "epoch": 0.47584107407696513, "grad_norm": 0.643236517906189, "learning_rate": 5e-05, "loss": 0.2186, "step": 1524 }, { "epoch": 0.4761533057528686, "grad_norm": 0.6373937726020813, "learning_rate": 5e-05, "loss": 0.2209, "step": 1525 }, { "epoch": 0.4764655374287721, "grad_norm": 0.6524330973625183, "learning_rate": 5e-05, "loss": 0.2063, "step": 1526 }, { "epoch": 0.4767777691046757, "grad_norm": 0.6328054070472717, "learning_rate": 5e-05, "loss": 0.2179, "step": 1527 }, { "epoch": 0.47709000078057917, "grad_norm": 0.6307472586631775, "learning_rate": 5e-05, "loss": 0.2262, "step": 1528 }, { "epoch": 0.4774022324564827, "grad_norm": 0.6422880291938782, "learning_rate": 5e-05, "loss": 0.2223, "step": 1529 }, { "epoch": 0.4777144641323862, "grad_norm": 0.6011480689048767, "learning_rate": 5e-05, "loss": 0.2146, "step": 1530 }, { "epoch": 0.47802669580828977, "grad_norm": 0.6392702460289001, "learning_rate": 5e-05, "loss": 0.2207, "step": 1531 }, { "epoch": 0.47833892748419327, "grad_norm": 0.6413083672523499, "learning_rate": 5e-05, "loss": 0.2372, "step": 1532 }, { "epoch": 0.4786511591600968, "grad_norm": 0.6120521426200867, "learning_rate": 5e-05, "loss": 0.2192, "step": 1533 }, { "epoch": 0.4789633908360003, "grad_norm": 0.6268153786659241, "learning_rate": 5e-05, "loss": 0.2329, "step": 1534 }, { "epoch": 0.4792756225119038, "grad_norm": 0.6473307609558105, "learning_rate": 5e-05, "loss": 0.2393, "step": 1535 }, { "epoch": 0.47958785418780736, "grad_norm": 0.5828239917755127, "learning_rate": 5e-05, "loss": 0.2017, "step": 1536 }, { "epoch": 0.47990008586371086, "grad_norm": 0.6074156165122986, "learning_rate": 5e-05, "loss": 0.229, "step": 1537 }, { "epoch": 0.4802123175396144, "grad_norm": 0.6754520535469055, "learning_rate": 5e-05, "loss": 0.2364, "step": 1538 }, { "epoch": 0.4805245492155179, "grad_norm": 0.6401694416999817, "learning_rate": 5e-05, "loss": 0.2313, "step": 1539 }, { "epoch": 0.48083678089142146, "grad_norm": 0.6076492071151733, "learning_rate": 5e-05, "loss": 0.229, "step": 1540 }, { "epoch": 0.48114901256732495, "grad_norm": 0.61426842212677, "learning_rate": 5e-05, "loss": 0.2154, "step": 1541 }, { "epoch": 0.48146124424322845, "grad_norm": 0.6309921741485596, "learning_rate": 5e-05, "loss": 0.2321, "step": 1542 }, { "epoch": 0.481773475919132, "grad_norm": 0.6337516903877258, "learning_rate": 5e-05, "loss": 0.2167, "step": 1543 }, { "epoch": 0.4820857075950355, "grad_norm": 0.6132643818855286, "learning_rate": 5e-05, "loss": 0.2244, "step": 1544 }, { "epoch": 0.48239793927093905, "grad_norm": 0.6394321322441101, "learning_rate": 5e-05, "loss": 0.2122, "step": 1545 }, { "epoch": 0.48271017094684254, "grad_norm": 0.6267091035842896, "learning_rate": 5e-05, "loss": 0.2195, "step": 1546 }, { "epoch": 0.4830224026227461, "grad_norm": 0.620923638343811, "learning_rate": 5e-05, "loss": 0.2115, "step": 1547 }, { "epoch": 0.4833346342986496, "grad_norm": 0.6342196464538574, "learning_rate": 5e-05, "loss": 0.2064, "step": 1548 }, { "epoch": 0.48364686597455314, "grad_norm": 0.6301013231277466, "learning_rate": 5e-05, "loss": 0.2189, "step": 1549 }, { "epoch": 0.48395909765045664, "grad_norm": 0.6449418067932129, "learning_rate": 5e-05, "loss": 0.2228, "step": 1550 }, { "epoch": 0.48427132932636013, "grad_norm": 0.5835330486297607, "learning_rate": 5e-05, "loss": 0.2016, "step": 1551 }, { "epoch": 0.4845835610022637, "grad_norm": 0.5976396203041077, "learning_rate": 5e-05, "loss": 0.2184, "step": 1552 }, { "epoch": 0.4848957926781672, "grad_norm": 0.6863794922828674, "learning_rate": 5e-05, "loss": 0.2379, "step": 1553 }, { "epoch": 0.48520802435407073, "grad_norm": 0.6140331625938416, "learning_rate": 5e-05, "loss": 0.2232, "step": 1554 }, { "epoch": 0.48552025602997423, "grad_norm": 0.5956310629844666, "learning_rate": 5e-05, "loss": 0.2181, "step": 1555 }, { "epoch": 0.4858324877058778, "grad_norm": 0.6260014772415161, "learning_rate": 5e-05, "loss": 0.2131, "step": 1556 }, { "epoch": 0.4861447193817813, "grad_norm": 0.634411096572876, "learning_rate": 5e-05, "loss": 0.2269, "step": 1557 }, { "epoch": 0.4864569510576848, "grad_norm": 0.6862738728523254, "learning_rate": 5e-05, "loss": 0.2413, "step": 1558 }, { "epoch": 0.4867691827335883, "grad_norm": 0.6238920092582703, "learning_rate": 5e-05, "loss": 0.2039, "step": 1559 }, { "epoch": 0.4870814144094918, "grad_norm": 0.6405962705612183, "learning_rate": 5e-05, "loss": 0.2079, "step": 1560 }, { "epoch": 0.48739364608539537, "grad_norm": 0.6507402658462524, "learning_rate": 5e-05, "loss": 0.2103, "step": 1561 }, { "epoch": 0.48770587776129887, "grad_norm": 0.6429463028907776, "learning_rate": 5e-05, "loss": 0.2218, "step": 1562 }, { "epoch": 0.4880181094372024, "grad_norm": 0.643785297870636, "learning_rate": 5e-05, "loss": 0.2369, "step": 1563 }, { "epoch": 0.4883303411131059, "grad_norm": 0.6139934062957764, "learning_rate": 5e-05, "loss": 0.2096, "step": 1564 }, { "epoch": 0.48864257278900947, "grad_norm": 0.6298250555992126, "learning_rate": 5e-05, "loss": 0.2158, "step": 1565 }, { "epoch": 0.48895480446491296, "grad_norm": 0.6180264353752136, "learning_rate": 5e-05, "loss": 0.2226, "step": 1566 }, { "epoch": 0.48926703614081646, "grad_norm": 0.6762915253639221, "learning_rate": 5e-05, "loss": 0.2204, "step": 1567 }, { "epoch": 0.48957926781672, "grad_norm": 0.6387673616409302, "learning_rate": 5e-05, "loss": 0.2311, "step": 1568 }, { "epoch": 0.4898914994926235, "grad_norm": 0.6718734502792358, "learning_rate": 5e-05, "loss": 0.2425, "step": 1569 }, { "epoch": 0.49020373116852706, "grad_norm": 0.6408171057701111, "learning_rate": 5e-05, "loss": 0.2075, "step": 1570 }, { "epoch": 0.49051596284443055, "grad_norm": 0.6428501605987549, "learning_rate": 5e-05, "loss": 0.2168, "step": 1571 }, { "epoch": 0.4908281945203341, "grad_norm": 0.6204647421836853, "learning_rate": 5e-05, "loss": 0.2155, "step": 1572 }, { "epoch": 0.4911404261962376, "grad_norm": 0.627505898475647, "learning_rate": 5e-05, "loss": 0.2253, "step": 1573 }, { "epoch": 0.49145265787214115, "grad_norm": 0.6501761674880981, "learning_rate": 5e-05, "loss": 0.222, "step": 1574 }, { "epoch": 0.49176488954804465, "grad_norm": 0.6072726249694824, "learning_rate": 5e-05, "loss": 0.2128, "step": 1575 }, { "epoch": 0.49207712122394814, "grad_norm": 0.6337903738021851, "learning_rate": 5e-05, "loss": 0.2385, "step": 1576 }, { "epoch": 0.4923893528998517, "grad_norm": 0.6421053409576416, "learning_rate": 5e-05, "loss": 0.1986, "step": 1577 }, { "epoch": 0.4927015845757552, "grad_norm": 0.6227735877037048, "learning_rate": 5e-05, "loss": 0.2238, "step": 1578 }, { "epoch": 0.49301381625165874, "grad_norm": 0.6229028105735779, "learning_rate": 5e-05, "loss": 0.2213, "step": 1579 }, { "epoch": 0.49332604792756224, "grad_norm": 0.608867347240448, "learning_rate": 5e-05, "loss": 0.2104, "step": 1580 }, { "epoch": 0.4936382796034658, "grad_norm": 0.6948683261871338, "learning_rate": 5e-05, "loss": 0.2235, "step": 1581 }, { "epoch": 0.4939505112793693, "grad_norm": 0.6442844867706299, "learning_rate": 5e-05, "loss": 0.2219, "step": 1582 }, { "epoch": 0.49426274295527284, "grad_norm": 0.6302390098571777, "learning_rate": 5e-05, "loss": 0.236, "step": 1583 }, { "epoch": 0.49457497463117633, "grad_norm": 0.6188952922821045, "learning_rate": 5e-05, "loss": 0.2204, "step": 1584 }, { "epoch": 0.49488720630707983, "grad_norm": 0.6226432919502258, "learning_rate": 5e-05, "loss": 0.2066, "step": 1585 }, { "epoch": 0.4951994379829834, "grad_norm": 0.6922722458839417, "learning_rate": 5e-05, "loss": 0.2336, "step": 1586 }, { "epoch": 0.4955116696588869, "grad_norm": 0.6164588928222656, "learning_rate": 5e-05, "loss": 0.2107, "step": 1587 }, { "epoch": 0.49582390133479043, "grad_norm": 0.634940505027771, "learning_rate": 5e-05, "loss": 0.2137, "step": 1588 }, { "epoch": 0.4961361330106939, "grad_norm": 0.6440693736076355, "learning_rate": 5e-05, "loss": 0.2416, "step": 1589 }, { "epoch": 0.4964483646865975, "grad_norm": 0.6278868913650513, "learning_rate": 5e-05, "loss": 0.2319, "step": 1590 }, { "epoch": 0.496760596362501, "grad_norm": 0.5989719033241272, "learning_rate": 5e-05, "loss": 0.2193, "step": 1591 }, { "epoch": 0.49707282803840447, "grad_norm": 0.6500462889671326, "learning_rate": 5e-05, "loss": 0.217, "step": 1592 }, { "epoch": 0.497385059714308, "grad_norm": 0.6524824500083923, "learning_rate": 5e-05, "loss": 0.2266, "step": 1593 }, { "epoch": 0.4976972913902115, "grad_norm": 0.5911077260971069, "learning_rate": 5e-05, "loss": 0.2146, "step": 1594 }, { "epoch": 0.49800952306611507, "grad_norm": 0.6299285292625427, "learning_rate": 5e-05, "loss": 0.2279, "step": 1595 }, { "epoch": 0.49832175474201856, "grad_norm": 0.6660704612731934, "learning_rate": 5e-05, "loss": 0.234, "step": 1596 }, { "epoch": 0.4986339864179221, "grad_norm": 0.6358308792114258, "learning_rate": 5e-05, "loss": 0.2018, "step": 1597 }, { "epoch": 0.4989462180938256, "grad_norm": 0.6701061129570007, "learning_rate": 5e-05, "loss": 0.2295, "step": 1598 }, { "epoch": 0.49925844976972916, "grad_norm": 0.6149998903274536, "learning_rate": 5e-05, "loss": 0.2125, "step": 1599 }, { "epoch": 0.49957068144563266, "grad_norm": 0.6421350240707397, "learning_rate": 5e-05, "loss": 0.2307, "step": 1600 }, { "epoch": 0.49988291312153615, "grad_norm": 0.6014291048049927, "learning_rate": 5e-05, "loss": 0.2178, "step": 1601 }, { "epoch": 0.5001951447974397, "grad_norm": 0.6371846795082092, "learning_rate": 5e-05, "loss": 0.2234, "step": 1602 }, { "epoch": 0.5005073764733432, "grad_norm": 0.6595979928970337, "learning_rate": 5e-05, "loss": 0.2387, "step": 1603 }, { "epoch": 0.5008196081492468, "grad_norm": 0.6213628649711609, "learning_rate": 5e-05, "loss": 0.2306, "step": 1604 }, { "epoch": 0.5011318398251503, "grad_norm": 0.61635422706604, "learning_rate": 5e-05, "loss": 0.2096, "step": 1605 }, { "epoch": 0.5014440715010537, "grad_norm": 0.6467339992523193, "learning_rate": 5e-05, "loss": 0.2302, "step": 1606 }, { "epoch": 0.5017563031769573, "grad_norm": 0.6624482870101929, "learning_rate": 5e-05, "loss": 0.2352, "step": 1607 }, { "epoch": 0.5020685348528608, "grad_norm": 0.5934258103370667, "learning_rate": 5e-05, "loss": 0.1979, "step": 1608 }, { "epoch": 0.5023807665287643, "grad_norm": 0.6085441708564758, "learning_rate": 5e-05, "loss": 0.2119, "step": 1609 }, { "epoch": 0.5026929982046678, "grad_norm": 0.6158584952354431, "learning_rate": 5e-05, "loss": 0.2182, "step": 1610 }, { "epoch": 0.5030052298805714, "grad_norm": 0.6521998047828674, "learning_rate": 5e-05, "loss": 0.2252, "step": 1611 }, { "epoch": 0.5033174615564749, "grad_norm": 0.6268987059593201, "learning_rate": 5e-05, "loss": 0.2199, "step": 1612 }, { "epoch": 0.5036296932323784, "grad_norm": 0.6651473045349121, "learning_rate": 5e-05, "loss": 0.2451, "step": 1613 }, { "epoch": 0.5039419249082819, "grad_norm": 0.5846369862556458, "learning_rate": 5e-05, "loss": 0.2027, "step": 1614 }, { "epoch": 0.5042541565841855, "grad_norm": 0.5978331565856934, "learning_rate": 5e-05, "loss": 0.2068, "step": 1615 }, { "epoch": 0.504566388260089, "grad_norm": 0.6514236927032471, "learning_rate": 5e-05, "loss": 0.232, "step": 1616 }, { "epoch": 0.5048786199359925, "grad_norm": 0.5866838097572327, "learning_rate": 5e-05, "loss": 0.2029, "step": 1617 }, { "epoch": 0.505190851611896, "grad_norm": 0.6313523054122925, "learning_rate": 5e-05, "loss": 0.2096, "step": 1618 }, { "epoch": 0.5055030832877996, "grad_norm": 0.6128988862037659, "learning_rate": 5e-05, "loss": 0.2105, "step": 1619 }, { "epoch": 0.505815314963703, "grad_norm": 0.6268384456634521, "learning_rate": 5e-05, "loss": 0.211, "step": 1620 }, { "epoch": 0.5061275466396066, "grad_norm": 0.6057892441749573, "learning_rate": 5e-05, "loss": 0.2164, "step": 1621 }, { "epoch": 0.5064397783155101, "grad_norm": 0.6410619020462036, "learning_rate": 5e-05, "loss": 0.2117, "step": 1622 }, { "epoch": 0.5067520099914137, "grad_norm": 0.6434296369552612, "learning_rate": 5e-05, "loss": 0.2219, "step": 1623 }, { "epoch": 0.5070642416673171, "grad_norm": 0.6648174524307251, "learning_rate": 5e-05, "loss": 0.226, "step": 1624 }, { "epoch": 0.5073764733432207, "grad_norm": 0.6344332098960876, "learning_rate": 5e-05, "loss": 0.2166, "step": 1625 }, { "epoch": 0.5076887050191242, "grad_norm": 0.6620396971702576, "learning_rate": 5e-05, "loss": 0.2343, "step": 1626 }, { "epoch": 0.5080009366950277, "grad_norm": 0.6034176349639893, "learning_rate": 5e-05, "loss": 0.2046, "step": 1627 }, { "epoch": 0.5083131683709312, "grad_norm": 0.6289302706718445, "learning_rate": 5e-05, "loss": 0.2056, "step": 1628 }, { "epoch": 0.5086254000468348, "grad_norm": 0.642011284828186, "learning_rate": 5e-05, "loss": 0.2286, "step": 1629 }, { "epoch": 0.5089376317227383, "grad_norm": 0.6450668573379517, "learning_rate": 5e-05, "loss": 0.229, "step": 1630 }, { "epoch": 0.5092498633986418, "grad_norm": 0.6210110783576965, "learning_rate": 5e-05, "loss": 0.2123, "step": 1631 }, { "epoch": 0.5095620950745453, "grad_norm": 0.6523484587669373, "learning_rate": 5e-05, "loss": 0.2301, "step": 1632 }, { "epoch": 0.5098743267504489, "grad_norm": 0.6080948710441589, "learning_rate": 5e-05, "loss": 0.2247, "step": 1633 }, { "epoch": 0.5101865584263523, "grad_norm": 0.6413857340812683, "learning_rate": 5e-05, "loss": 0.2316, "step": 1634 }, { "epoch": 0.5104987901022559, "grad_norm": 0.6077040433883667, "learning_rate": 5e-05, "loss": 0.2127, "step": 1635 }, { "epoch": 0.5108110217781594, "grad_norm": 0.6260326504707336, "learning_rate": 5e-05, "loss": 0.2148, "step": 1636 }, { "epoch": 0.511123253454063, "grad_norm": 0.624790370464325, "learning_rate": 5e-05, "loss": 0.2314, "step": 1637 }, { "epoch": 0.5114354851299664, "grad_norm": 0.6217085123062134, "learning_rate": 5e-05, "loss": 0.2192, "step": 1638 }, { "epoch": 0.51174771680587, "grad_norm": 0.6046383380889893, "learning_rate": 5e-05, "loss": 0.2251, "step": 1639 }, { "epoch": 0.5120599484817735, "grad_norm": 0.6215452551841736, "learning_rate": 5e-05, "loss": 0.2126, "step": 1640 }, { "epoch": 0.512372180157677, "grad_norm": 0.6978608965873718, "learning_rate": 5e-05, "loss": 0.248, "step": 1641 }, { "epoch": 0.5126844118335805, "grad_norm": 0.6041996479034424, "learning_rate": 5e-05, "loss": 0.203, "step": 1642 }, { "epoch": 0.512996643509484, "grad_norm": 0.5831112861633301, "learning_rate": 5e-05, "loss": 0.1998, "step": 1643 }, { "epoch": 0.5133088751853876, "grad_norm": 0.5974743962287903, "learning_rate": 5e-05, "loss": 0.2104, "step": 1644 }, { "epoch": 0.513621106861291, "grad_norm": 0.633897602558136, "learning_rate": 5e-05, "loss": 0.2222, "step": 1645 }, { "epoch": 0.5139333385371946, "grad_norm": 0.6923972368240356, "learning_rate": 5e-05, "loss": 0.2183, "step": 1646 }, { "epoch": 0.5142455702130981, "grad_norm": 0.6234503388404846, "learning_rate": 5e-05, "loss": 0.2236, "step": 1647 }, { "epoch": 0.5145578018890017, "grad_norm": 0.6065315008163452, "learning_rate": 5e-05, "loss": 0.2159, "step": 1648 }, { "epoch": 0.5148700335649051, "grad_norm": 0.6263312697410583, "learning_rate": 5e-05, "loss": 0.2198, "step": 1649 }, { "epoch": 0.5151822652408087, "grad_norm": 0.6250565052032471, "learning_rate": 5e-05, "loss": 0.2174, "step": 1650 }, { "epoch": 0.5154944969167122, "grad_norm": 0.6658428311347961, "learning_rate": 5e-05, "loss": 0.2328, "step": 1651 }, { "epoch": 0.5158067285926157, "grad_norm": 0.6455751061439514, "learning_rate": 5e-05, "loss": 0.2335, "step": 1652 }, { "epoch": 0.5161189602685192, "grad_norm": 0.6621795296669006, "learning_rate": 5e-05, "loss": 0.2372, "step": 1653 }, { "epoch": 0.5164311919444228, "grad_norm": 0.6268715262413025, "learning_rate": 5e-05, "loss": 0.2111, "step": 1654 }, { "epoch": 0.5167434236203263, "grad_norm": 0.6520974040031433, "learning_rate": 5e-05, "loss": 0.2195, "step": 1655 }, { "epoch": 0.5170556552962298, "grad_norm": 0.6299859881401062, "learning_rate": 5e-05, "loss": 0.213, "step": 1656 }, { "epoch": 0.5173678869721333, "grad_norm": 0.6486907601356506, "learning_rate": 5e-05, "loss": 0.2261, "step": 1657 }, { "epoch": 0.5176801186480369, "grad_norm": 0.6720088720321655, "learning_rate": 5e-05, "loss": 0.221, "step": 1658 }, { "epoch": 0.5179923503239403, "grad_norm": 0.6529538631439209, "learning_rate": 5e-05, "loss": 0.2282, "step": 1659 }, { "epoch": 0.5183045819998439, "grad_norm": 0.6489236354827881, "learning_rate": 5e-05, "loss": 0.218, "step": 1660 }, { "epoch": 0.5186168136757474, "grad_norm": 0.6286658644676208, "learning_rate": 5e-05, "loss": 0.2245, "step": 1661 }, { "epoch": 0.518929045351651, "grad_norm": 0.6365597248077393, "learning_rate": 5e-05, "loss": 0.2167, "step": 1662 }, { "epoch": 0.5192412770275544, "grad_norm": 0.6282097697257996, "learning_rate": 5e-05, "loss": 0.2263, "step": 1663 }, { "epoch": 0.519553508703458, "grad_norm": 0.6427894234657288, "learning_rate": 5e-05, "loss": 0.2332, "step": 1664 }, { "epoch": 0.5198657403793615, "grad_norm": 0.6223099231719971, "learning_rate": 5e-05, "loss": 0.212, "step": 1665 }, { "epoch": 0.5201779720552651, "grad_norm": 0.6365476250648499, "learning_rate": 5e-05, "loss": 0.2273, "step": 1666 }, { "epoch": 0.5204902037311685, "grad_norm": 0.6608768105506897, "learning_rate": 5e-05, "loss": 0.2135, "step": 1667 }, { "epoch": 0.520802435407072, "grad_norm": 0.6197283864021301, "learning_rate": 5e-05, "loss": 0.2168, "step": 1668 }, { "epoch": 0.5211146670829756, "grad_norm": 0.6206272840499878, "learning_rate": 5e-05, "loss": 0.2252, "step": 1669 }, { "epoch": 0.521426898758879, "grad_norm": 0.6400339603424072, "learning_rate": 5e-05, "loss": 0.2308, "step": 1670 }, { "epoch": 0.5217391304347826, "grad_norm": 0.6106640100479126, "learning_rate": 5e-05, "loss": 0.2173, "step": 1671 }, { "epoch": 0.5220513621106861, "grad_norm": 0.5942342877388, "learning_rate": 5e-05, "loss": 0.2029, "step": 1672 }, { "epoch": 0.5223635937865897, "grad_norm": 0.654047966003418, "learning_rate": 5e-05, "loss": 0.2409, "step": 1673 }, { "epoch": 0.5226758254624931, "grad_norm": 0.6079325675964355, "learning_rate": 5e-05, "loss": 0.2011, "step": 1674 }, { "epoch": 0.5229880571383967, "grad_norm": 0.6447796821594238, "learning_rate": 5e-05, "loss": 0.2281, "step": 1675 }, { "epoch": 0.5233002888143002, "grad_norm": 0.6981320977210999, "learning_rate": 5e-05, "loss": 0.2359, "step": 1676 }, { "epoch": 0.5236125204902037, "grad_norm": 0.6252594590187073, "learning_rate": 5e-05, "loss": 0.2131, "step": 1677 }, { "epoch": 0.5239247521661072, "grad_norm": 0.6351770758628845, "learning_rate": 5e-05, "loss": 0.236, "step": 1678 }, { "epoch": 0.5242369838420108, "grad_norm": 0.5909566879272461, "learning_rate": 5e-05, "loss": 0.2099, "step": 1679 }, { "epoch": 0.5245492155179143, "grad_norm": 0.6542765498161316, "learning_rate": 5e-05, "loss": 0.2295, "step": 1680 }, { "epoch": 0.5248614471938178, "grad_norm": 0.6460165977478027, "learning_rate": 5e-05, "loss": 0.2181, "step": 1681 }, { "epoch": 0.5251736788697213, "grad_norm": 0.670966625213623, "learning_rate": 5e-05, "loss": 0.2319, "step": 1682 }, { "epoch": 0.5254859105456249, "grad_norm": 0.64467453956604, "learning_rate": 5e-05, "loss": 0.2177, "step": 1683 }, { "epoch": 0.5257981422215283, "grad_norm": 0.6746854186058044, "learning_rate": 5e-05, "loss": 0.2106, "step": 1684 }, { "epoch": 0.5261103738974319, "grad_norm": 0.6204254627227783, "learning_rate": 5e-05, "loss": 0.2243, "step": 1685 }, { "epoch": 0.5264226055733354, "grad_norm": 0.6044176816940308, "learning_rate": 5e-05, "loss": 0.2151, "step": 1686 }, { "epoch": 0.526734837249239, "grad_norm": 0.6593879461288452, "learning_rate": 5e-05, "loss": 0.2504, "step": 1687 }, { "epoch": 0.5270470689251424, "grad_norm": 0.5950485467910767, "learning_rate": 5e-05, "loss": 0.2058, "step": 1688 }, { "epoch": 0.527359300601046, "grad_norm": 0.6048310995101929, "learning_rate": 5e-05, "loss": 0.2167, "step": 1689 }, { "epoch": 0.5276715322769495, "grad_norm": 0.626575231552124, "learning_rate": 5e-05, "loss": 0.2019, "step": 1690 }, { "epoch": 0.5279837639528531, "grad_norm": 0.6753300428390503, "learning_rate": 5e-05, "loss": 0.2334, "step": 1691 }, { "epoch": 0.5282959956287565, "grad_norm": 0.6469703316688538, "learning_rate": 5e-05, "loss": 0.2106, "step": 1692 }, { "epoch": 0.5286082273046601, "grad_norm": 0.631550669670105, "learning_rate": 5e-05, "loss": 0.2238, "step": 1693 }, { "epoch": 0.5289204589805636, "grad_norm": 0.6114062666893005, "learning_rate": 5e-05, "loss": 0.2082, "step": 1694 }, { "epoch": 0.529232690656467, "grad_norm": 0.6303057074546814, "learning_rate": 5e-05, "loss": 0.2185, "step": 1695 }, { "epoch": 0.5295449223323706, "grad_norm": 0.6209258437156677, "learning_rate": 5e-05, "loss": 0.2051, "step": 1696 }, { "epoch": 0.5298571540082742, "grad_norm": 0.6224830150604248, "learning_rate": 5e-05, "loss": 0.2145, "step": 1697 }, { "epoch": 0.5301693856841777, "grad_norm": 0.6755900979042053, "learning_rate": 5e-05, "loss": 0.2342, "step": 1698 }, { "epoch": 0.5304816173600811, "grad_norm": 0.6288338303565979, "learning_rate": 5e-05, "loss": 0.2193, "step": 1699 }, { "epoch": 0.5307938490359847, "grad_norm": 0.6351395845413208, "learning_rate": 5e-05, "loss": 0.2143, "step": 1700 }, { "epoch": 0.5311060807118883, "grad_norm": 0.6208987832069397, "learning_rate": 5e-05, "loss": 0.2193, "step": 1701 }, { "epoch": 0.5314183123877917, "grad_norm": 0.6221000552177429, "learning_rate": 5e-05, "loss": 0.2002, "step": 1702 }, { "epoch": 0.5317305440636952, "grad_norm": 0.6617984175682068, "learning_rate": 5e-05, "loss": 0.2345, "step": 1703 }, { "epoch": 0.5320427757395988, "grad_norm": 0.6018268465995789, "learning_rate": 5e-05, "loss": 0.2051, "step": 1704 }, { "epoch": 0.5323550074155023, "grad_norm": 0.6208410263061523, "learning_rate": 5e-05, "loss": 0.2242, "step": 1705 }, { "epoch": 0.5326672390914058, "grad_norm": 0.6371368169784546, "learning_rate": 5e-05, "loss": 0.2285, "step": 1706 }, { "epoch": 0.5329794707673093, "grad_norm": 0.6302842497825623, "learning_rate": 5e-05, "loss": 0.2225, "step": 1707 }, { "epoch": 0.5332917024432129, "grad_norm": 0.6182356476783752, "learning_rate": 5e-05, "loss": 0.2105, "step": 1708 }, { "epoch": 0.5336039341191163, "grad_norm": 0.6452282667160034, "learning_rate": 5e-05, "loss": 0.242, "step": 1709 }, { "epoch": 0.5339161657950199, "grad_norm": 0.6414559483528137, "learning_rate": 5e-05, "loss": 0.2349, "step": 1710 }, { "epoch": 0.5342283974709234, "grad_norm": 0.6299080848693848, "learning_rate": 5e-05, "loss": 0.2166, "step": 1711 }, { "epoch": 0.534540629146827, "grad_norm": 0.6734235286712646, "learning_rate": 5e-05, "loss": 0.2383, "step": 1712 }, { "epoch": 0.5348528608227304, "grad_norm": 0.6101397275924683, "learning_rate": 5e-05, "loss": 0.2139, "step": 1713 }, { "epoch": 0.535165092498634, "grad_norm": 0.6549090147018433, "learning_rate": 5e-05, "loss": 0.2269, "step": 1714 }, { "epoch": 0.5354773241745375, "grad_norm": 0.6488587260246277, "learning_rate": 5e-05, "loss": 0.2216, "step": 1715 }, { "epoch": 0.5357895558504411, "grad_norm": 0.6503477096557617, "learning_rate": 5e-05, "loss": 0.2397, "step": 1716 }, { "epoch": 0.5361017875263445, "grad_norm": 0.6626065969467163, "learning_rate": 5e-05, "loss": 0.2269, "step": 1717 }, { "epoch": 0.5364140192022481, "grad_norm": 0.6484348177909851, "learning_rate": 5e-05, "loss": 0.2268, "step": 1718 }, { "epoch": 0.5367262508781516, "grad_norm": 0.6341847777366638, "learning_rate": 5e-05, "loss": 0.2248, "step": 1719 }, { "epoch": 0.5370384825540551, "grad_norm": 0.6310688257217407, "learning_rate": 5e-05, "loss": 0.2176, "step": 1720 }, { "epoch": 0.5373507142299586, "grad_norm": 0.6006829142570496, "learning_rate": 5e-05, "loss": 0.2102, "step": 1721 }, { "epoch": 0.5376629459058622, "grad_norm": 0.6130343079566956, "learning_rate": 5e-05, "loss": 0.2233, "step": 1722 }, { "epoch": 0.5379751775817657, "grad_norm": 0.6308581829071045, "learning_rate": 5e-05, "loss": 0.2167, "step": 1723 }, { "epoch": 0.5382874092576692, "grad_norm": 0.6221240162849426, "learning_rate": 5e-05, "loss": 0.217, "step": 1724 }, { "epoch": 0.5385996409335727, "grad_norm": 0.6090912222862244, "learning_rate": 5e-05, "loss": 0.2211, "step": 1725 }, { "epoch": 0.5389118726094763, "grad_norm": 0.6403423547744751, "learning_rate": 5e-05, "loss": 0.218, "step": 1726 }, { "epoch": 0.5392241042853797, "grad_norm": 0.5863705277442932, "learning_rate": 5e-05, "loss": 0.2231, "step": 1727 }, { "epoch": 0.5395363359612833, "grad_norm": 0.6694061160087585, "learning_rate": 5e-05, "loss": 0.23, "step": 1728 }, { "epoch": 0.5398485676371868, "grad_norm": 0.6618030667304993, "learning_rate": 5e-05, "loss": 0.2296, "step": 1729 }, { "epoch": 0.5401607993130904, "grad_norm": 0.6709596514701843, "learning_rate": 5e-05, "loss": 0.2325, "step": 1730 }, { "epoch": 0.5404730309889938, "grad_norm": 0.6088460087776184, "learning_rate": 5e-05, "loss": 0.2095, "step": 1731 }, { "epoch": 0.5407852626648973, "grad_norm": 0.6333540081977844, "learning_rate": 5e-05, "loss": 0.2238, "step": 1732 }, { "epoch": 0.5410974943408009, "grad_norm": 0.6156498193740845, "learning_rate": 5e-05, "loss": 0.2112, "step": 1733 }, { "epoch": 0.5414097260167043, "grad_norm": 0.6343149542808533, "learning_rate": 5e-05, "loss": 0.2336, "step": 1734 }, { "epoch": 0.5417219576926079, "grad_norm": 0.6432977318763733, "learning_rate": 5e-05, "loss": 0.2253, "step": 1735 }, { "epoch": 0.5420341893685114, "grad_norm": 0.6262071132659912, "learning_rate": 5e-05, "loss": 0.2227, "step": 1736 }, { "epoch": 0.542346421044415, "grad_norm": 0.6587005257606506, "learning_rate": 5e-05, "loss": 0.2175, "step": 1737 }, { "epoch": 0.5426586527203184, "grad_norm": 0.6449254751205444, "learning_rate": 5e-05, "loss": 0.2203, "step": 1738 }, { "epoch": 0.542970884396222, "grad_norm": 0.6341560482978821, "learning_rate": 5e-05, "loss": 0.2183, "step": 1739 }, { "epoch": 0.5432831160721255, "grad_norm": 0.6614809632301331, "learning_rate": 5e-05, "loss": 0.2207, "step": 1740 }, { "epoch": 0.5435953477480291, "grad_norm": 0.6329062581062317, "learning_rate": 5e-05, "loss": 0.1967, "step": 1741 }, { "epoch": 0.5439075794239325, "grad_norm": 0.6017585396766663, "learning_rate": 5e-05, "loss": 0.2039, "step": 1742 }, { "epoch": 0.5442198110998361, "grad_norm": 0.638486385345459, "learning_rate": 5e-05, "loss": 0.2212, "step": 1743 }, { "epoch": 0.5445320427757396, "grad_norm": 0.6475170254707336, "learning_rate": 5e-05, "loss": 0.2251, "step": 1744 }, { "epoch": 0.5448442744516431, "grad_norm": 0.6148253083229065, "learning_rate": 5e-05, "loss": 0.2241, "step": 1745 }, { "epoch": 0.5451565061275466, "grad_norm": 0.6497926115989685, "learning_rate": 5e-05, "loss": 0.2271, "step": 1746 }, { "epoch": 0.5454687378034502, "grad_norm": 0.691101610660553, "learning_rate": 5e-05, "loss": 0.251, "step": 1747 }, { "epoch": 0.5457809694793537, "grad_norm": 0.6030811071395874, "learning_rate": 5e-05, "loss": 0.2014, "step": 1748 }, { "epoch": 0.5460932011552572, "grad_norm": 0.6131238341331482, "learning_rate": 5e-05, "loss": 0.2187, "step": 1749 }, { "epoch": 0.5464054328311607, "grad_norm": 0.6200949549674988, "learning_rate": 5e-05, "loss": 0.2274, "step": 1750 }, { "epoch": 0.5467176645070643, "grad_norm": 0.6003824472427368, "learning_rate": 5e-05, "loss": 0.2083, "step": 1751 }, { "epoch": 0.5470298961829677, "grad_norm": 0.6238158345222473, "learning_rate": 5e-05, "loss": 0.2161, "step": 1752 }, { "epoch": 0.5473421278588713, "grad_norm": 0.6741564869880676, "learning_rate": 5e-05, "loss": 0.2427, "step": 1753 }, { "epoch": 0.5476543595347748, "grad_norm": 0.6699058413505554, "learning_rate": 5e-05, "loss": 0.2354, "step": 1754 }, { "epoch": 0.5479665912106784, "grad_norm": 0.6342421770095825, "learning_rate": 5e-05, "loss": 0.212, "step": 1755 }, { "epoch": 0.5482788228865818, "grad_norm": 0.6255199909210205, "learning_rate": 5e-05, "loss": 0.2182, "step": 1756 }, { "epoch": 0.5485910545624854, "grad_norm": 0.6660852432250977, "learning_rate": 5e-05, "loss": 0.2303, "step": 1757 }, { "epoch": 0.5489032862383889, "grad_norm": 0.6034395098686218, "learning_rate": 5e-05, "loss": 0.22, "step": 1758 }, { "epoch": 0.5492155179142924, "grad_norm": 0.6334333419799805, "learning_rate": 5e-05, "loss": 0.2235, "step": 1759 }, { "epoch": 0.5495277495901959, "grad_norm": 0.5983448624610901, "learning_rate": 5e-05, "loss": 0.1977, "step": 1760 }, { "epoch": 0.5498399812660995, "grad_norm": 0.6006038188934326, "learning_rate": 5e-05, "loss": 0.2103, "step": 1761 }, { "epoch": 0.550152212942003, "grad_norm": 0.6428787112236023, "learning_rate": 5e-05, "loss": 0.2329, "step": 1762 }, { "epoch": 0.5504644446179064, "grad_norm": 0.6605635285377502, "learning_rate": 5e-05, "loss": 0.2296, "step": 1763 }, { "epoch": 0.55077667629381, "grad_norm": 0.6108589172363281, "learning_rate": 5e-05, "loss": 0.2154, "step": 1764 }, { "epoch": 0.5510889079697135, "grad_norm": 0.5989452004432678, "learning_rate": 5e-05, "loss": 0.2152, "step": 1765 }, { "epoch": 0.5514011396456171, "grad_norm": 0.6302116513252258, "learning_rate": 5e-05, "loss": 0.2187, "step": 1766 }, { "epoch": 0.5517133713215205, "grad_norm": 0.6373298168182373, "learning_rate": 5e-05, "loss": 0.2254, "step": 1767 }, { "epoch": 0.5520256029974241, "grad_norm": 0.6258125901222229, "learning_rate": 5e-05, "loss": 0.2218, "step": 1768 }, { "epoch": 0.5523378346733276, "grad_norm": 0.6244069933891296, "learning_rate": 5e-05, "loss": 0.2189, "step": 1769 }, { "epoch": 0.5526500663492311, "grad_norm": 0.6070876121520996, "learning_rate": 5e-05, "loss": 0.2146, "step": 1770 }, { "epoch": 0.5529622980251346, "grad_norm": 0.5922994017601013, "learning_rate": 5e-05, "loss": 0.2036, "step": 1771 }, { "epoch": 0.5532745297010382, "grad_norm": 0.6688510775566101, "learning_rate": 5e-05, "loss": 0.2347, "step": 1772 }, { "epoch": 0.5535867613769417, "grad_norm": 0.6324248909950256, "learning_rate": 5e-05, "loss": 0.2065, "step": 1773 }, { "epoch": 0.5538989930528452, "grad_norm": 0.6157494783401489, "learning_rate": 5e-05, "loss": 0.2112, "step": 1774 }, { "epoch": 0.5542112247287487, "grad_norm": 0.6311381459236145, "learning_rate": 5e-05, "loss": 0.2154, "step": 1775 }, { "epoch": 0.5545234564046523, "grad_norm": 0.6027914881706238, "learning_rate": 5e-05, "loss": 0.2175, "step": 1776 }, { "epoch": 0.5548356880805557, "grad_norm": 0.6119417548179626, "learning_rate": 5e-05, "loss": 0.2196, "step": 1777 }, { "epoch": 0.5551479197564593, "grad_norm": 0.6020864844322205, "learning_rate": 5e-05, "loss": 0.2061, "step": 1778 }, { "epoch": 0.5554601514323628, "grad_norm": 0.65812748670578, "learning_rate": 5e-05, "loss": 0.2231, "step": 1779 }, { "epoch": 0.5557723831082664, "grad_norm": 0.6496811509132385, "learning_rate": 5e-05, "loss": 0.2215, "step": 1780 }, { "epoch": 0.5560846147841698, "grad_norm": 0.6062426567077637, "learning_rate": 5e-05, "loss": 0.2086, "step": 1781 }, { "epoch": 0.5563968464600734, "grad_norm": 0.6056748032569885, "learning_rate": 5e-05, "loss": 0.2147, "step": 1782 }, { "epoch": 0.5567090781359769, "grad_norm": 0.6322394013404846, "learning_rate": 5e-05, "loss": 0.2226, "step": 1783 }, { "epoch": 0.5570213098118804, "grad_norm": 0.635364830493927, "learning_rate": 5e-05, "loss": 0.2329, "step": 1784 }, { "epoch": 0.5573335414877839, "grad_norm": 0.6714361906051636, "learning_rate": 5e-05, "loss": 0.2285, "step": 1785 }, { "epoch": 0.5576457731636875, "grad_norm": 0.61544269323349, "learning_rate": 5e-05, "loss": 0.2116, "step": 1786 }, { "epoch": 0.557958004839591, "grad_norm": 0.5999631285667419, "learning_rate": 5e-05, "loss": 0.2105, "step": 1787 }, { "epoch": 0.5582702365154945, "grad_norm": 0.5926848649978638, "learning_rate": 5e-05, "loss": 0.2005, "step": 1788 }, { "epoch": 0.558582468191398, "grad_norm": 0.6249416470527649, "learning_rate": 5e-05, "loss": 0.2377, "step": 1789 }, { "epoch": 0.5588946998673016, "grad_norm": 0.6490787863731384, "learning_rate": 5e-05, "loss": 0.2441, "step": 1790 }, { "epoch": 0.5592069315432051, "grad_norm": 0.6589080095291138, "learning_rate": 5e-05, "loss": 0.2341, "step": 1791 }, { "epoch": 0.5595191632191086, "grad_norm": 0.6002094745635986, "learning_rate": 5e-05, "loss": 0.2204, "step": 1792 }, { "epoch": 0.5598313948950121, "grad_norm": 0.6949983835220337, "learning_rate": 5e-05, "loss": 0.2272, "step": 1793 }, { "epoch": 0.5601436265709157, "grad_norm": 0.6303181052207947, "learning_rate": 5e-05, "loss": 0.226, "step": 1794 }, { "epoch": 0.5604558582468191, "grad_norm": 0.625268816947937, "learning_rate": 5e-05, "loss": 0.2308, "step": 1795 }, { "epoch": 0.5607680899227226, "grad_norm": 0.6240856051445007, "learning_rate": 5e-05, "loss": 0.219, "step": 1796 }, { "epoch": 0.5610803215986262, "grad_norm": 0.660301923751831, "learning_rate": 5e-05, "loss": 0.239, "step": 1797 }, { "epoch": 0.5613925532745297, "grad_norm": 0.6131911873817444, "learning_rate": 5e-05, "loss": 0.2312, "step": 1798 }, { "epoch": 0.5617047849504332, "grad_norm": 0.634680986404419, "learning_rate": 5e-05, "loss": 0.2085, "step": 1799 }, { "epoch": 0.5620170166263367, "grad_norm": 0.5905648469924927, "learning_rate": 5e-05, "loss": 0.2022, "step": 1800 }, { "epoch": 0.5623292483022403, "grad_norm": 0.6277625560760498, "learning_rate": 5e-05, "loss": 0.2288, "step": 1801 }, { "epoch": 0.5626414799781437, "grad_norm": 0.662610650062561, "learning_rate": 5e-05, "loss": 0.2378, "step": 1802 }, { "epoch": 0.5629537116540473, "grad_norm": 0.5907447338104248, "learning_rate": 5e-05, "loss": 0.1978, "step": 1803 }, { "epoch": 0.5632659433299508, "grad_norm": 0.620386004447937, "learning_rate": 5e-05, "loss": 0.2221, "step": 1804 }, { "epoch": 0.5635781750058544, "grad_norm": 0.6248092651367188, "learning_rate": 5e-05, "loss": 0.2155, "step": 1805 }, { "epoch": 0.5638904066817578, "grad_norm": 0.6114423871040344, "learning_rate": 5e-05, "loss": 0.2248, "step": 1806 }, { "epoch": 0.5642026383576614, "grad_norm": 0.69481360912323, "learning_rate": 5e-05, "loss": 0.2178, "step": 1807 }, { "epoch": 0.5645148700335649, "grad_norm": 0.5781159996986389, "learning_rate": 5e-05, "loss": 0.2075, "step": 1808 }, { "epoch": 0.5648271017094684, "grad_norm": 0.6185794472694397, "learning_rate": 5e-05, "loss": 0.2307, "step": 1809 }, { "epoch": 0.5651393333853719, "grad_norm": 0.6338289380073547, "learning_rate": 5e-05, "loss": 0.2201, "step": 1810 }, { "epoch": 0.5654515650612755, "grad_norm": 0.6644631028175354, "learning_rate": 5e-05, "loss": 0.2305, "step": 1811 }, { "epoch": 0.565763796737179, "grad_norm": 0.6105445623397827, "learning_rate": 5e-05, "loss": 0.2119, "step": 1812 }, { "epoch": 0.5660760284130825, "grad_norm": 0.6391845345497131, "learning_rate": 5e-05, "loss": 0.2311, "step": 1813 }, { "epoch": 0.566388260088986, "grad_norm": 0.6082035303115845, "learning_rate": 5e-05, "loss": 0.2089, "step": 1814 }, { "epoch": 0.5667004917648896, "grad_norm": 0.6357818245887756, "learning_rate": 5e-05, "loss": 0.2284, "step": 1815 }, { "epoch": 0.5670127234407931, "grad_norm": 0.5982517004013062, "learning_rate": 5e-05, "loss": 0.2249, "step": 1816 }, { "epoch": 0.5673249551166966, "grad_norm": 0.6034576892852783, "learning_rate": 5e-05, "loss": 0.1952, "step": 1817 }, { "epoch": 0.5676371867926001, "grad_norm": 0.6265826225280762, "learning_rate": 5e-05, "loss": 0.2227, "step": 1818 }, { "epoch": 0.5679494184685037, "grad_norm": 0.5742529034614563, "learning_rate": 5e-05, "loss": 0.2051, "step": 1819 }, { "epoch": 0.5682616501444071, "grad_norm": 0.7061665058135986, "learning_rate": 5e-05, "loss": 0.2242, "step": 1820 }, { "epoch": 0.5685738818203107, "grad_norm": 0.6116936206817627, "learning_rate": 5e-05, "loss": 0.2161, "step": 1821 }, { "epoch": 0.5688861134962142, "grad_norm": 0.6044204235076904, "learning_rate": 5e-05, "loss": 0.2186, "step": 1822 }, { "epoch": 0.5691983451721178, "grad_norm": 0.6212533712387085, "learning_rate": 5e-05, "loss": 0.2391, "step": 1823 }, { "epoch": 0.5695105768480212, "grad_norm": 0.6227824687957764, "learning_rate": 5e-05, "loss": 0.2144, "step": 1824 }, { "epoch": 0.5698228085239248, "grad_norm": 0.6433607935905457, "learning_rate": 5e-05, "loss": 0.2328, "step": 1825 }, { "epoch": 0.5701350401998283, "grad_norm": 0.5926203727722168, "learning_rate": 5e-05, "loss": 0.2136, "step": 1826 }, { "epoch": 0.5704472718757317, "grad_norm": 0.6058375835418701, "learning_rate": 5e-05, "loss": 0.2164, "step": 1827 }, { "epoch": 0.5707595035516353, "grad_norm": 0.5860849618911743, "learning_rate": 5e-05, "loss": 0.2117, "step": 1828 }, { "epoch": 0.5710717352275388, "grad_norm": 0.618284285068512, "learning_rate": 5e-05, "loss": 0.2086, "step": 1829 }, { "epoch": 0.5713839669034424, "grad_norm": 0.6448509693145752, "learning_rate": 5e-05, "loss": 0.2399, "step": 1830 }, { "epoch": 0.5716961985793458, "grad_norm": 0.6168004274368286, "learning_rate": 5e-05, "loss": 0.22, "step": 1831 }, { "epoch": 0.5720084302552494, "grad_norm": 0.6367641091346741, "learning_rate": 5e-05, "loss": 0.2332, "step": 1832 }, { "epoch": 0.5723206619311529, "grad_norm": 0.6193959712982178, "learning_rate": 5e-05, "loss": 0.2155, "step": 1833 }, { "epoch": 0.5726328936070564, "grad_norm": 0.5882675647735596, "learning_rate": 5e-05, "loss": 0.2057, "step": 1834 }, { "epoch": 0.5729451252829599, "grad_norm": 0.5782700181007385, "learning_rate": 5e-05, "loss": 0.2041, "step": 1835 }, { "epoch": 0.5732573569588635, "grad_norm": 0.6277264952659607, "learning_rate": 5e-05, "loss": 0.2168, "step": 1836 }, { "epoch": 0.573569588634767, "grad_norm": 0.6480419635772705, "learning_rate": 5e-05, "loss": 0.2436, "step": 1837 }, { "epoch": 0.5738818203106705, "grad_norm": 0.5994533896446228, "learning_rate": 5e-05, "loss": 0.2074, "step": 1838 }, { "epoch": 0.574194051986574, "grad_norm": 0.6428390145301819, "learning_rate": 5e-05, "loss": 0.2154, "step": 1839 }, { "epoch": 0.5745062836624776, "grad_norm": 0.6163097620010376, "learning_rate": 5e-05, "loss": 0.2214, "step": 1840 }, { "epoch": 0.5748185153383811, "grad_norm": 0.621480405330658, "learning_rate": 5e-05, "loss": 0.2304, "step": 1841 }, { "epoch": 0.5751307470142846, "grad_norm": 0.5688770413398743, "learning_rate": 5e-05, "loss": 0.1902, "step": 1842 }, { "epoch": 0.5754429786901881, "grad_norm": 0.6246997117996216, "learning_rate": 5e-05, "loss": 0.238, "step": 1843 }, { "epoch": 0.5757552103660917, "grad_norm": 0.5900658965110779, "learning_rate": 5e-05, "loss": 0.2006, "step": 1844 }, { "epoch": 0.5760674420419951, "grad_norm": 0.612694501876831, "learning_rate": 5e-05, "loss": 0.2156, "step": 1845 }, { "epoch": 0.5763796737178987, "grad_norm": 0.660085141658783, "learning_rate": 5e-05, "loss": 0.2353, "step": 1846 }, { "epoch": 0.5766919053938022, "grad_norm": 0.6057989597320557, "learning_rate": 5e-05, "loss": 0.2146, "step": 1847 }, { "epoch": 0.5770041370697058, "grad_norm": 0.6362593173980713, "learning_rate": 5e-05, "loss": 0.2249, "step": 1848 }, { "epoch": 0.5773163687456092, "grad_norm": 0.6152637004852295, "learning_rate": 5e-05, "loss": 0.2275, "step": 1849 }, { "epoch": 0.5776286004215128, "grad_norm": 0.640375018119812, "learning_rate": 5e-05, "loss": 0.2342, "step": 1850 }, { "epoch": 0.5779408320974163, "grad_norm": 0.6410207748413086, "learning_rate": 5e-05, "loss": 0.2256, "step": 1851 }, { "epoch": 0.5782530637733198, "grad_norm": 0.6392170190811157, "learning_rate": 5e-05, "loss": 0.2339, "step": 1852 }, { "epoch": 0.5785652954492233, "grad_norm": 0.62956702709198, "learning_rate": 5e-05, "loss": 0.22, "step": 1853 }, { "epoch": 0.5788775271251269, "grad_norm": 0.6529841423034668, "learning_rate": 5e-05, "loss": 0.2226, "step": 1854 }, { "epoch": 0.5791897588010304, "grad_norm": 0.61708003282547, "learning_rate": 5e-05, "loss": 0.2272, "step": 1855 }, { "epoch": 0.5795019904769338, "grad_norm": 0.6264530420303345, "learning_rate": 5e-05, "loss": 0.2321, "step": 1856 }, { "epoch": 0.5798142221528374, "grad_norm": 0.6179224252700806, "learning_rate": 5e-05, "loss": 0.2187, "step": 1857 }, { "epoch": 0.580126453828741, "grad_norm": 0.5780946612358093, "learning_rate": 5e-05, "loss": 0.2033, "step": 1858 }, { "epoch": 0.5804386855046444, "grad_norm": 0.5985773801803589, "learning_rate": 5e-05, "loss": 0.2088, "step": 1859 }, { "epoch": 0.5807509171805479, "grad_norm": 0.5932460427284241, "learning_rate": 5e-05, "loss": 0.2156, "step": 1860 }, { "epoch": 0.5810631488564515, "grad_norm": 0.6385922431945801, "learning_rate": 5e-05, "loss": 0.2431, "step": 1861 }, { "epoch": 0.581375380532355, "grad_norm": 0.6402168869972229, "learning_rate": 5e-05, "loss": 0.2222, "step": 1862 }, { "epoch": 0.5816876122082585, "grad_norm": 0.6369123458862305, "learning_rate": 5e-05, "loss": 0.2321, "step": 1863 }, { "epoch": 0.581999843884162, "grad_norm": 0.657329797744751, "learning_rate": 5e-05, "loss": 0.2253, "step": 1864 }, { "epoch": 0.5823120755600656, "grad_norm": 0.6202287077903748, "learning_rate": 5e-05, "loss": 0.2281, "step": 1865 }, { "epoch": 0.5826243072359691, "grad_norm": 0.6161398887634277, "learning_rate": 5e-05, "loss": 0.2219, "step": 1866 }, { "epoch": 0.5829365389118726, "grad_norm": 0.6158270835876465, "learning_rate": 5e-05, "loss": 0.2124, "step": 1867 }, { "epoch": 0.5832487705877761, "grad_norm": 0.6346787810325623, "learning_rate": 5e-05, "loss": 0.2218, "step": 1868 }, { "epoch": 0.5835610022636797, "grad_norm": 0.6002383828163147, "learning_rate": 5e-05, "loss": 0.2078, "step": 1869 }, { "epoch": 0.5838732339395831, "grad_norm": 0.6429198980331421, "learning_rate": 5e-05, "loss": 0.224, "step": 1870 }, { "epoch": 0.5841854656154867, "grad_norm": 0.6232706904411316, "learning_rate": 5e-05, "loss": 0.2155, "step": 1871 }, { "epoch": 0.5844976972913902, "grad_norm": 0.575086236000061, "learning_rate": 5e-05, "loss": 0.2221, "step": 1872 }, { "epoch": 0.5848099289672938, "grad_norm": 0.6047078967094421, "learning_rate": 5e-05, "loss": 0.2114, "step": 1873 }, { "epoch": 0.5851221606431972, "grad_norm": 0.594334602355957, "learning_rate": 5e-05, "loss": 0.2125, "step": 1874 }, { "epoch": 0.5854343923191008, "grad_norm": 0.6303955316543579, "learning_rate": 5e-05, "loss": 0.2193, "step": 1875 }, { "epoch": 0.5857466239950043, "grad_norm": 0.5896738767623901, "learning_rate": 5e-05, "loss": 0.2188, "step": 1876 }, { "epoch": 0.5860588556709078, "grad_norm": 0.6137311458587646, "learning_rate": 5e-05, "loss": 0.2012, "step": 1877 }, { "epoch": 0.5863710873468113, "grad_norm": 0.668308675289154, "learning_rate": 5e-05, "loss": 0.2064, "step": 1878 }, { "epoch": 0.5866833190227149, "grad_norm": 0.6364906430244446, "learning_rate": 5e-05, "loss": 0.2217, "step": 1879 }, { "epoch": 0.5869955506986184, "grad_norm": 0.6821914911270142, "learning_rate": 5e-05, "loss": 0.2276, "step": 1880 }, { "epoch": 0.5873077823745219, "grad_norm": 0.6111158132553101, "learning_rate": 5e-05, "loss": 0.2119, "step": 1881 }, { "epoch": 0.5876200140504254, "grad_norm": 0.6069135069847107, "learning_rate": 5e-05, "loss": 0.206, "step": 1882 }, { "epoch": 0.587932245726329, "grad_norm": 0.651843786239624, "learning_rate": 5e-05, "loss": 0.2363, "step": 1883 }, { "epoch": 0.5882444774022324, "grad_norm": 0.5792482495307922, "learning_rate": 5e-05, "loss": 0.1975, "step": 1884 }, { "epoch": 0.588556709078136, "grad_norm": 0.6677501797676086, "learning_rate": 5e-05, "loss": 0.2417, "step": 1885 }, { "epoch": 0.5888689407540395, "grad_norm": 0.6503654718399048, "learning_rate": 5e-05, "loss": 0.2274, "step": 1886 }, { "epoch": 0.5891811724299431, "grad_norm": 0.6241284608840942, "learning_rate": 5e-05, "loss": 0.2212, "step": 1887 }, { "epoch": 0.5894934041058465, "grad_norm": 0.6224884986877441, "learning_rate": 5e-05, "loss": 0.2276, "step": 1888 }, { "epoch": 0.58980563578175, "grad_norm": 0.5986716151237488, "learning_rate": 5e-05, "loss": 0.2058, "step": 1889 }, { "epoch": 0.5901178674576536, "grad_norm": 0.6083580255508423, "learning_rate": 5e-05, "loss": 0.2247, "step": 1890 }, { "epoch": 0.5904300991335572, "grad_norm": 0.6352951526641846, "learning_rate": 5e-05, "loss": 0.2315, "step": 1891 }, { "epoch": 0.5907423308094606, "grad_norm": 0.6084421277046204, "learning_rate": 5e-05, "loss": 0.2233, "step": 1892 }, { "epoch": 0.5910545624853641, "grad_norm": 0.6097609996795654, "learning_rate": 5e-05, "loss": 0.2204, "step": 1893 }, { "epoch": 0.5913667941612677, "grad_norm": 0.6498511433601379, "learning_rate": 5e-05, "loss": 0.2197, "step": 1894 }, { "epoch": 0.5916790258371711, "grad_norm": 0.64594566822052, "learning_rate": 5e-05, "loss": 0.2332, "step": 1895 }, { "epoch": 0.5919912575130747, "grad_norm": 0.5998656153678894, "learning_rate": 5e-05, "loss": 0.2118, "step": 1896 }, { "epoch": 0.5923034891889782, "grad_norm": 0.6086589097976685, "learning_rate": 5e-05, "loss": 0.199, "step": 1897 }, { "epoch": 0.5926157208648818, "grad_norm": 0.6199403405189514, "learning_rate": 5e-05, "loss": 0.2199, "step": 1898 }, { "epoch": 0.5929279525407852, "grad_norm": 0.6143611073493958, "learning_rate": 5e-05, "loss": 0.2106, "step": 1899 }, { "epoch": 0.5932401842166888, "grad_norm": 0.6118437647819519, "learning_rate": 5e-05, "loss": 0.2215, "step": 1900 }, { "epoch": 0.5935524158925923, "grad_norm": 0.5885438323020935, "learning_rate": 5e-05, "loss": 0.2057, "step": 1901 }, { "epoch": 0.5938646475684958, "grad_norm": 0.6604949235916138, "learning_rate": 5e-05, "loss": 0.2309, "step": 1902 }, { "epoch": 0.5941768792443993, "grad_norm": 0.6256579756736755, "learning_rate": 5e-05, "loss": 0.2185, "step": 1903 }, { "epoch": 0.5944891109203029, "grad_norm": 0.6184607148170471, "learning_rate": 5e-05, "loss": 0.2253, "step": 1904 }, { "epoch": 0.5948013425962064, "grad_norm": 0.679724395275116, "learning_rate": 5e-05, "loss": 0.2282, "step": 1905 }, { "epoch": 0.5951135742721099, "grad_norm": 0.6274874210357666, "learning_rate": 5e-05, "loss": 0.2075, "step": 1906 }, { "epoch": 0.5954258059480134, "grad_norm": 0.6431184411048889, "learning_rate": 5e-05, "loss": 0.2215, "step": 1907 }, { "epoch": 0.595738037623917, "grad_norm": 0.6278941035270691, "learning_rate": 5e-05, "loss": 0.2269, "step": 1908 }, { "epoch": 0.5960502692998204, "grad_norm": 0.5842365026473999, "learning_rate": 5e-05, "loss": 0.2116, "step": 1909 }, { "epoch": 0.596362500975724, "grad_norm": 0.6011607646942139, "learning_rate": 5e-05, "loss": 0.2157, "step": 1910 }, { "epoch": 0.5966747326516275, "grad_norm": 0.6020752191543579, "learning_rate": 5e-05, "loss": 0.2184, "step": 1911 }, { "epoch": 0.5969869643275311, "grad_norm": 0.5844548344612122, "learning_rate": 5e-05, "loss": 0.2046, "step": 1912 }, { "epoch": 0.5972991960034345, "grad_norm": 0.6188512444496155, "learning_rate": 5e-05, "loss": 0.2251, "step": 1913 }, { "epoch": 0.5976114276793381, "grad_norm": 0.6163000464439392, "learning_rate": 5e-05, "loss": 0.2183, "step": 1914 }, { "epoch": 0.5979236593552416, "grad_norm": 0.6086611151695251, "learning_rate": 5e-05, "loss": 0.2199, "step": 1915 }, { "epoch": 0.5982358910311452, "grad_norm": 0.6269985437393188, "learning_rate": 5e-05, "loss": 0.2208, "step": 1916 }, { "epoch": 0.5985481227070486, "grad_norm": 0.6092752814292908, "learning_rate": 5e-05, "loss": 0.2124, "step": 1917 }, { "epoch": 0.5988603543829522, "grad_norm": 0.6119150519371033, "learning_rate": 5e-05, "loss": 0.2135, "step": 1918 }, { "epoch": 0.5991725860588557, "grad_norm": 0.648826003074646, "learning_rate": 5e-05, "loss": 0.2347, "step": 1919 }, { "epoch": 0.5994848177347591, "grad_norm": 0.6172620058059692, "learning_rate": 5e-05, "loss": 0.2298, "step": 1920 }, { "epoch": 0.5997970494106627, "grad_norm": 0.6419510245323181, "learning_rate": 5e-05, "loss": 0.2264, "step": 1921 }, { "epoch": 0.6001092810865662, "grad_norm": 0.6416138410568237, "learning_rate": 5e-05, "loss": 0.2354, "step": 1922 }, { "epoch": 0.6004215127624698, "grad_norm": 0.6302432417869568, "learning_rate": 5e-05, "loss": 0.2373, "step": 1923 }, { "epoch": 0.6007337444383732, "grad_norm": 0.6329814791679382, "learning_rate": 5e-05, "loss": 0.2157, "step": 1924 }, { "epoch": 0.6010459761142768, "grad_norm": 0.614093542098999, "learning_rate": 5e-05, "loss": 0.2114, "step": 1925 }, { "epoch": 0.6013582077901803, "grad_norm": 0.6403329968452454, "learning_rate": 5e-05, "loss": 0.2297, "step": 1926 }, { "epoch": 0.6016704394660838, "grad_norm": 0.6632216572761536, "learning_rate": 5e-05, "loss": 0.2272, "step": 1927 }, { "epoch": 0.6019826711419873, "grad_norm": 0.5886644721031189, "learning_rate": 5e-05, "loss": 0.2033, "step": 1928 }, { "epoch": 0.6022949028178909, "grad_norm": 0.5990208387374878, "learning_rate": 5e-05, "loss": 0.219, "step": 1929 }, { "epoch": 0.6026071344937944, "grad_norm": 0.6277127861976624, "learning_rate": 5e-05, "loss": 0.1953, "step": 1930 }, { "epoch": 0.6029193661696979, "grad_norm": 0.6209976673126221, "learning_rate": 5e-05, "loss": 0.2004, "step": 1931 }, { "epoch": 0.6032315978456014, "grad_norm": 0.6149734258651733, "learning_rate": 5e-05, "loss": 0.2311, "step": 1932 }, { "epoch": 0.603543829521505, "grad_norm": 0.6033601760864258, "learning_rate": 5e-05, "loss": 0.2264, "step": 1933 }, { "epoch": 0.6038560611974084, "grad_norm": 0.6001458168029785, "learning_rate": 5e-05, "loss": 0.2172, "step": 1934 }, { "epoch": 0.604168292873312, "grad_norm": 0.621783971786499, "learning_rate": 5e-05, "loss": 0.2047, "step": 1935 }, { "epoch": 0.6044805245492155, "grad_norm": 0.6147360801696777, "learning_rate": 5e-05, "loss": 0.2166, "step": 1936 }, { "epoch": 0.6047927562251191, "grad_norm": 0.6096742153167725, "learning_rate": 5e-05, "loss": 0.2185, "step": 1937 }, { "epoch": 0.6051049879010225, "grad_norm": 0.6072768568992615, "learning_rate": 5e-05, "loss": 0.2109, "step": 1938 }, { "epoch": 0.6054172195769261, "grad_norm": 0.5980080366134644, "learning_rate": 5e-05, "loss": 0.2169, "step": 1939 }, { "epoch": 0.6057294512528296, "grad_norm": 0.5740246772766113, "learning_rate": 5e-05, "loss": 0.208, "step": 1940 }, { "epoch": 0.6060416829287332, "grad_norm": 0.6334108710289001, "learning_rate": 5e-05, "loss": 0.2286, "step": 1941 }, { "epoch": 0.6063539146046366, "grad_norm": 0.5939199328422546, "learning_rate": 5e-05, "loss": 0.199, "step": 1942 }, { "epoch": 0.6066661462805402, "grad_norm": 0.6147668957710266, "learning_rate": 5e-05, "loss": 0.2171, "step": 1943 }, { "epoch": 0.6069783779564437, "grad_norm": 0.6269360780715942, "learning_rate": 5e-05, "loss": 0.2238, "step": 1944 }, { "epoch": 0.6072906096323472, "grad_norm": 0.6227685809135437, "learning_rate": 5e-05, "loss": 0.2244, "step": 1945 }, { "epoch": 0.6076028413082507, "grad_norm": 0.696974515914917, "learning_rate": 5e-05, "loss": 0.2268, "step": 1946 }, { "epoch": 0.6079150729841543, "grad_norm": 0.6275862455368042, "learning_rate": 5e-05, "loss": 0.2218, "step": 1947 }, { "epoch": 0.6082273046600578, "grad_norm": 0.628684937953949, "learning_rate": 5e-05, "loss": 0.2371, "step": 1948 }, { "epoch": 0.6085395363359613, "grad_norm": 0.6163367629051208, "learning_rate": 5e-05, "loss": 0.2174, "step": 1949 }, { "epoch": 0.6088517680118648, "grad_norm": 0.5646088123321533, "learning_rate": 5e-05, "loss": 0.2062, "step": 1950 }, { "epoch": 0.6091639996877684, "grad_norm": 0.6219223141670227, "learning_rate": 5e-05, "loss": 0.2166, "step": 1951 }, { "epoch": 0.6094762313636718, "grad_norm": 0.6534251570701599, "learning_rate": 5e-05, "loss": 0.2297, "step": 1952 }, { "epoch": 0.6097884630395753, "grad_norm": 0.6334348917007446, "learning_rate": 5e-05, "loss": 0.2239, "step": 1953 }, { "epoch": 0.6101006947154789, "grad_norm": 0.6267979741096497, "learning_rate": 5e-05, "loss": 0.2331, "step": 1954 }, { "epoch": 0.6104129263913824, "grad_norm": 0.601295530796051, "learning_rate": 5e-05, "loss": 0.2326, "step": 1955 }, { "epoch": 0.6107251580672859, "grad_norm": 0.6141254305839539, "learning_rate": 5e-05, "loss": 0.2147, "step": 1956 }, { "epoch": 0.6110373897431894, "grad_norm": 0.6251623630523682, "learning_rate": 5e-05, "loss": 0.2286, "step": 1957 }, { "epoch": 0.611349621419093, "grad_norm": 0.6560406684875488, "learning_rate": 5e-05, "loss": 0.2263, "step": 1958 }, { "epoch": 0.6116618530949964, "grad_norm": 0.6436022520065308, "learning_rate": 5e-05, "loss": 0.2315, "step": 1959 }, { "epoch": 0.6119740847709, "grad_norm": 0.648205041885376, "learning_rate": 5e-05, "loss": 0.2184, "step": 1960 }, { "epoch": 0.6122863164468035, "grad_norm": 0.6358290910720825, "learning_rate": 5e-05, "loss": 0.2324, "step": 1961 }, { "epoch": 0.6125985481227071, "grad_norm": 0.6527628302574158, "learning_rate": 5e-05, "loss": 0.2083, "step": 1962 }, { "epoch": 0.6129107797986105, "grad_norm": 0.6363287568092346, "learning_rate": 5e-05, "loss": 0.2281, "step": 1963 }, { "epoch": 0.6132230114745141, "grad_norm": 0.6221181750297546, "learning_rate": 5e-05, "loss": 0.2194, "step": 1964 }, { "epoch": 0.6135352431504176, "grad_norm": 0.6285766363143921, "learning_rate": 5e-05, "loss": 0.2309, "step": 1965 }, { "epoch": 0.6138474748263212, "grad_norm": 0.588305652141571, "learning_rate": 5e-05, "loss": 0.22, "step": 1966 }, { "epoch": 0.6141597065022246, "grad_norm": 0.6026490330696106, "learning_rate": 5e-05, "loss": 0.2074, "step": 1967 }, { "epoch": 0.6144719381781282, "grad_norm": 0.6497485041618347, "learning_rate": 5e-05, "loss": 0.2327, "step": 1968 }, { "epoch": 0.6147841698540317, "grad_norm": 0.6335916519165039, "learning_rate": 5e-05, "loss": 0.2234, "step": 1969 }, { "epoch": 0.6150964015299352, "grad_norm": 0.6472461819648743, "learning_rate": 5e-05, "loss": 0.2071, "step": 1970 }, { "epoch": 0.6154086332058387, "grad_norm": 0.6093105673789978, "learning_rate": 5e-05, "loss": 0.2156, "step": 1971 }, { "epoch": 0.6157208648817423, "grad_norm": 0.608284056186676, "learning_rate": 5e-05, "loss": 0.2106, "step": 1972 }, { "epoch": 0.6160330965576458, "grad_norm": 0.6344753503799438, "learning_rate": 5e-05, "loss": 0.2222, "step": 1973 }, { "epoch": 0.6163453282335493, "grad_norm": 0.5923776030540466, "learning_rate": 5e-05, "loss": 0.2158, "step": 1974 }, { "epoch": 0.6166575599094528, "grad_norm": 0.6419810056686401, "learning_rate": 5e-05, "loss": 0.2187, "step": 1975 }, { "epoch": 0.6169697915853564, "grad_norm": 0.6153873205184937, "learning_rate": 5e-05, "loss": 0.2245, "step": 1976 }, { "epoch": 0.6172820232612598, "grad_norm": 0.6331546306610107, "learning_rate": 5e-05, "loss": 0.1974, "step": 1977 }, { "epoch": 0.6175942549371634, "grad_norm": 0.6640638113021851, "learning_rate": 5e-05, "loss": 0.2284, "step": 1978 }, { "epoch": 0.6179064866130669, "grad_norm": 0.6411094665527344, "learning_rate": 5e-05, "loss": 0.227, "step": 1979 }, { "epoch": 0.6182187182889705, "grad_norm": 0.6183809638023376, "learning_rate": 5e-05, "loss": 0.2289, "step": 1980 }, { "epoch": 0.6185309499648739, "grad_norm": 0.5985243320465088, "learning_rate": 5e-05, "loss": 0.2122, "step": 1981 }, { "epoch": 0.6188431816407775, "grad_norm": 0.6367735862731934, "learning_rate": 5e-05, "loss": 0.2203, "step": 1982 }, { "epoch": 0.619155413316681, "grad_norm": 0.6082991361618042, "learning_rate": 5e-05, "loss": 0.2237, "step": 1983 }, { "epoch": 0.6194676449925844, "grad_norm": 0.6175509691238403, "learning_rate": 5e-05, "loss": 0.2043, "step": 1984 }, { "epoch": 0.619779876668488, "grad_norm": 0.6226653456687927, "learning_rate": 5e-05, "loss": 0.2114, "step": 1985 }, { "epoch": 0.6200921083443915, "grad_norm": 0.6079607009887695, "learning_rate": 5e-05, "loss": 0.2238, "step": 1986 }, { "epoch": 0.6204043400202951, "grad_norm": 0.6429606676101685, "learning_rate": 5e-05, "loss": 0.2173, "step": 1987 }, { "epoch": 0.6207165716961985, "grad_norm": 0.6062070727348328, "learning_rate": 5e-05, "loss": 0.2076, "step": 1988 }, { "epoch": 0.6210288033721021, "grad_norm": 0.6488637328147888, "learning_rate": 5e-05, "loss": 0.2346, "step": 1989 }, { "epoch": 0.6213410350480056, "grad_norm": 0.6055715680122375, "learning_rate": 5e-05, "loss": 0.2177, "step": 1990 }, { "epoch": 0.6216532667239092, "grad_norm": 0.6091405749320984, "learning_rate": 5e-05, "loss": 0.2197, "step": 1991 }, { "epoch": 0.6219654983998126, "grad_norm": 0.5656712651252747, "learning_rate": 5e-05, "loss": 0.1865, "step": 1992 }, { "epoch": 0.6222777300757162, "grad_norm": 0.6383893489837646, "learning_rate": 5e-05, "loss": 0.2264, "step": 1993 }, { "epoch": 0.6225899617516197, "grad_norm": 0.6299546957015991, "learning_rate": 5e-05, "loss": 0.229, "step": 1994 }, { "epoch": 0.6229021934275232, "grad_norm": 0.6065727472305298, "learning_rate": 5e-05, "loss": 0.2081, "step": 1995 }, { "epoch": 0.6232144251034267, "grad_norm": 0.6377463936805725, "learning_rate": 5e-05, "loss": 0.2249, "step": 1996 }, { "epoch": 0.6235266567793303, "grad_norm": 0.6016868352890015, "learning_rate": 5e-05, "loss": 0.2126, "step": 1997 }, { "epoch": 0.6238388884552338, "grad_norm": 0.6183316707611084, "learning_rate": 5e-05, "loss": 0.2305, "step": 1998 }, { "epoch": 0.6241511201311373, "grad_norm": 0.6456790566444397, "learning_rate": 5e-05, "loss": 0.2351, "step": 1999 }, { "epoch": 0.6244633518070408, "grad_norm": 0.5953778624534607, "learning_rate": 5e-05, "loss": 0.2206, "step": 2000 }, { "epoch": 0.6247755834829444, "grad_norm": 0.6407880187034607, "learning_rate": 5e-05, "loss": 0.219, "step": 2001 }, { "epoch": 0.6250878151588478, "grad_norm": 0.6122270822525024, "learning_rate": 5e-05, "loss": 0.2145, "step": 2002 }, { "epoch": 0.6254000468347514, "grad_norm": 0.6321791410446167, "learning_rate": 5e-05, "loss": 0.2312, "step": 2003 }, { "epoch": 0.6257122785106549, "grad_norm": 0.5793911218643188, "learning_rate": 5e-05, "loss": 0.1954, "step": 2004 }, { "epoch": 0.6260245101865585, "grad_norm": 0.6148756742477417, "learning_rate": 5e-05, "loss": 0.2024, "step": 2005 }, { "epoch": 0.6263367418624619, "grad_norm": 0.6203504800796509, "learning_rate": 5e-05, "loss": 0.2122, "step": 2006 }, { "epoch": 0.6266489735383655, "grad_norm": 0.6258541941642761, "learning_rate": 5e-05, "loss": 0.2157, "step": 2007 }, { "epoch": 0.626961205214269, "grad_norm": 0.6195490956306458, "learning_rate": 5e-05, "loss": 0.2037, "step": 2008 }, { "epoch": 0.6272734368901725, "grad_norm": 0.5840429067611694, "learning_rate": 5e-05, "loss": 0.2072, "step": 2009 }, { "epoch": 0.627585668566076, "grad_norm": 0.6173880100250244, "learning_rate": 5e-05, "loss": 0.2174, "step": 2010 }, { "epoch": 0.6278979002419796, "grad_norm": 0.6773568391799927, "learning_rate": 5e-05, "loss": 0.2335, "step": 2011 }, { "epoch": 0.6282101319178831, "grad_norm": 0.6167200803756714, "learning_rate": 5e-05, "loss": 0.213, "step": 2012 }, { "epoch": 0.6285223635937865, "grad_norm": 0.6247929930686951, "learning_rate": 5e-05, "loss": 0.22, "step": 2013 }, { "epoch": 0.6288345952696901, "grad_norm": 0.6001771092414856, "learning_rate": 5e-05, "loss": 0.2186, "step": 2014 }, { "epoch": 0.6291468269455937, "grad_norm": 0.6226188540458679, "learning_rate": 5e-05, "loss": 0.2341, "step": 2015 }, { "epoch": 0.6294590586214972, "grad_norm": 0.611708402633667, "learning_rate": 5e-05, "loss": 0.2102, "step": 2016 }, { "epoch": 0.6297712902974006, "grad_norm": 0.6221734285354614, "learning_rate": 5e-05, "loss": 0.2195, "step": 2017 }, { "epoch": 0.6300835219733042, "grad_norm": 0.5948175191879272, "learning_rate": 5e-05, "loss": 0.2181, "step": 2018 }, { "epoch": 0.6303957536492077, "grad_norm": 0.6374267339706421, "learning_rate": 5e-05, "loss": 0.2274, "step": 2019 }, { "epoch": 0.6307079853251112, "grad_norm": 0.6273247599601746, "learning_rate": 5e-05, "loss": 0.2195, "step": 2020 }, { "epoch": 0.6310202170010147, "grad_norm": 0.5986998081207275, "learning_rate": 5e-05, "loss": 0.2147, "step": 2021 }, { "epoch": 0.6313324486769183, "grad_norm": 0.6311092972755432, "learning_rate": 5e-05, "loss": 0.2287, "step": 2022 }, { "epoch": 0.6316446803528218, "grad_norm": 0.6677855849266052, "learning_rate": 5e-05, "loss": 0.2405, "step": 2023 }, { "epoch": 0.6319569120287253, "grad_norm": 0.6415626406669617, "learning_rate": 5e-05, "loss": 0.2284, "step": 2024 }, { "epoch": 0.6322691437046288, "grad_norm": 0.5992298722267151, "learning_rate": 5e-05, "loss": 0.2052, "step": 2025 }, { "epoch": 0.6325813753805324, "grad_norm": 0.6199409365653992, "learning_rate": 5e-05, "loss": 0.2105, "step": 2026 }, { "epoch": 0.6328936070564358, "grad_norm": 0.6127330660820007, "learning_rate": 5e-05, "loss": 0.2236, "step": 2027 }, { "epoch": 0.6332058387323394, "grad_norm": 0.6045218110084534, "learning_rate": 5e-05, "loss": 0.2072, "step": 2028 }, { "epoch": 0.6335180704082429, "grad_norm": 0.636313796043396, "learning_rate": 5e-05, "loss": 0.2061, "step": 2029 }, { "epoch": 0.6338303020841465, "grad_norm": 0.6241464018821716, "learning_rate": 5e-05, "loss": 0.2076, "step": 2030 }, { "epoch": 0.6341425337600499, "grad_norm": 0.6224902868270874, "learning_rate": 5e-05, "loss": 0.2285, "step": 2031 }, { "epoch": 0.6344547654359535, "grad_norm": 0.6099748611450195, "learning_rate": 5e-05, "loss": 0.217, "step": 2032 }, { "epoch": 0.634766997111857, "grad_norm": 0.6225326657295227, "learning_rate": 5e-05, "loss": 0.2002, "step": 2033 }, { "epoch": 0.6350792287877605, "grad_norm": 0.6239826083183289, "learning_rate": 5e-05, "loss": 0.2174, "step": 2034 }, { "epoch": 0.635391460463664, "grad_norm": 0.6447561383247375, "learning_rate": 5e-05, "loss": 0.234, "step": 2035 }, { "epoch": 0.6357036921395676, "grad_norm": 0.6467114686965942, "learning_rate": 5e-05, "loss": 0.2283, "step": 2036 }, { "epoch": 0.6360159238154711, "grad_norm": 0.5905013084411621, "learning_rate": 5e-05, "loss": 0.2083, "step": 2037 }, { "epoch": 0.6363281554913746, "grad_norm": 0.6484181880950928, "learning_rate": 5e-05, "loss": 0.2188, "step": 2038 }, { "epoch": 0.6366403871672781, "grad_norm": 0.660306990146637, "learning_rate": 5e-05, "loss": 0.2227, "step": 2039 }, { "epoch": 0.6369526188431817, "grad_norm": 0.5727385878562927, "learning_rate": 5e-05, "loss": 0.1969, "step": 2040 }, { "epoch": 0.6372648505190852, "grad_norm": 0.6384777426719666, "learning_rate": 5e-05, "loss": 0.204, "step": 2041 }, { "epoch": 0.6375770821949887, "grad_norm": 0.63572096824646, "learning_rate": 5e-05, "loss": 0.2297, "step": 2042 }, { "epoch": 0.6378893138708922, "grad_norm": 0.6303291916847229, "learning_rate": 5e-05, "loss": 0.2132, "step": 2043 }, { "epoch": 0.6382015455467958, "grad_norm": 0.6186389327049255, "learning_rate": 5e-05, "loss": 0.2235, "step": 2044 }, { "epoch": 0.6385137772226992, "grad_norm": 0.5936133861541748, "learning_rate": 5e-05, "loss": 0.2091, "step": 2045 }, { "epoch": 0.6388260088986027, "grad_norm": 0.6189283132553101, "learning_rate": 5e-05, "loss": 0.2227, "step": 2046 }, { "epoch": 0.6391382405745063, "grad_norm": 0.6242743134498596, "learning_rate": 5e-05, "loss": 0.2257, "step": 2047 }, { "epoch": 0.6394504722504099, "grad_norm": 0.5832568407058716, "learning_rate": 5e-05, "loss": 0.2149, "step": 2048 }, { "epoch": 0.6397627039263133, "grad_norm": 0.5953076481819153, "learning_rate": 5e-05, "loss": 0.2137, "step": 2049 }, { "epoch": 0.6400749356022168, "grad_norm": 0.6213819980621338, "learning_rate": 5e-05, "loss": 0.2227, "step": 2050 }, { "epoch": 0.6403871672781204, "grad_norm": 0.6034430265426636, "learning_rate": 5e-05, "loss": 0.1968, "step": 2051 }, { "epoch": 0.6406993989540238, "grad_norm": 0.6628347039222717, "learning_rate": 5e-05, "loss": 0.2387, "step": 2052 }, { "epoch": 0.6410116306299274, "grad_norm": 0.6183295845985413, "learning_rate": 5e-05, "loss": 0.2219, "step": 2053 }, { "epoch": 0.6413238623058309, "grad_norm": 0.6827853918075562, "learning_rate": 5e-05, "loss": 0.2545, "step": 2054 }, { "epoch": 0.6416360939817345, "grad_norm": 0.6011988520622253, "learning_rate": 5e-05, "loss": 0.2154, "step": 2055 }, { "epoch": 0.6419483256576379, "grad_norm": 0.649394154548645, "learning_rate": 5e-05, "loss": 0.217, "step": 2056 }, { "epoch": 0.6422605573335415, "grad_norm": 0.5943832397460938, "learning_rate": 5e-05, "loss": 0.2013, "step": 2057 }, { "epoch": 0.642572789009445, "grad_norm": 0.6125427484512329, "learning_rate": 5e-05, "loss": 0.2081, "step": 2058 }, { "epoch": 0.6428850206853485, "grad_norm": 0.6137369871139526, "learning_rate": 5e-05, "loss": 0.2136, "step": 2059 }, { "epoch": 0.643197252361252, "grad_norm": 0.606317937374115, "learning_rate": 5e-05, "loss": 0.2112, "step": 2060 }, { "epoch": 0.6435094840371556, "grad_norm": 0.6443127989768982, "learning_rate": 5e-05, "loss": 0.2191, "step": 2061 }, { "epoch": 0.6438217157130591, "grad_norm": 0.6450709104537964, "learning_rate": 5e-05, "loss": 0.1981, "step": 2062 }, { "epoch": 0.6441339473889626, "grad_norm": 0.6202965974807739, "learning_rate": 5e-05, "loss": 0.2168, "step": 2063 }, { "epoch": 0.6444461790648661, "grad_norm": 0.6637272834777832, "learning_rate": 5e-05, "loss": 0.2371, "step": 2064 }, { "epoch": 0.6447584107407697, "grad_norm": 0.6166815757751465, "learning_rate": 5e-05, "loss": 0.2032, "step": 2065 }, { "epoch": 0.6450706424166732, "grad_norm": 0.6204001307487488, "learning_rate": 5e-05, "loss": 0.222, "step": 2066 }, { "epoch": 0.6453828740925767, "grad_norm": 0.6601096391677856, "learning_rate": 5e-05, "loss": 0.2231, "step": 2067 }, { "epoch": 0.6456951057684802, "grad_norm": 0.6024062633514404, "learning_rate": 5e-05, "loss": 0.2011, "step": 2068 }, { "epoch": 0.6460073374443838, "grad_norm": 0.6432746648788452, "learning_rate": 5e-05, "loss": 0.2488, "step": 2069 }, { "epoch": 0.6463195691202872, "grad_norm": 0.6029177308082581, "learning_rate": 5e-05, "loss": 0.2072, "step": 2070 }, { "epoch": 0.6466318007961908, "grad_norm": 0.6360803246498108, "learning_rate": 5e-05, "loss": 0.2335, "step": 2071 }, { "epoch": 0.6469440324720943, "grad_norm": 0.6396716237068176, "learning_rate": 5e-05, "loss": 0.2238, "step": 2072 }, { "epoch": 0.6472562641479979, "grad_norm": 0.6031143069267273, "learning_rate": 5e-05, "loss": 0.2117, "step": 2073 }, { "epoch": 0.6475684958239013, "grad_norm": 0.5922034978866577, "learning_rate": 5e-05, "loss": 0.2193, "step": 2074 }, { "epoch": 0.6478807274998049, "grad_norm": 0.6164787411689758, "learning_rate": 5e-05, "loss": 0.2115, "step": 2075 }, { "epoch": 0.6481929591757084, "grad_norm": 0.622136116027832, "learning_rate": 5e-05, "loss": 0.2228, "step": 2076 }, { "epoch": 0.6485051908516118, "grad_norm": 0.6147032380104065, "learning_rate": 5e-05, "loss": 0.2149, "step": 2077 }, { "epoch": 0.6488174225275154, "grad_norm": 0.6010338068008423, "learning_rate": 5e-05, "loss": 0.225, "step": 2078 }, { "epoch": 0.649129654203419, "grad_norm": 0.6261254549026489, "learning_rate": 5e-05, "loss": 0.2146, "step": 2079 }, { "epoch": 0.6494418858793225, "grad_norm": 0.6106892824172974, "learning_rate": 5e-05, "loss": 0.2201, "step": 2080 }, { "epoch": 0.6497541175552259, "grad_norm": 0.6031441688537598, "learning_rate": 5e-05, "loss": 0.2063, "step": 2081 }, { "epoch": 0.6500663492311295, "grad_norm": 0.6498478055000305, "learning_rate": 5e-05, "loss": 0.2231, "step": 2082 }, { "epoch": 0.650378580907033, "grad_norm": 0.6274979710578918, "learning_rate": 5e-05, "loss": 0.2245, "step": 2083 }, { "epoch": 0.6506908125829365, "grad_norm": 0.6357938647270203, "learning_rate": 5e-05, "loss": 0.2088, "step": 2084 }, { "epoch": 0.65100304425884, "grad_norm": 0.6795604825019836, "learning_rate": 5e-05, "loss": 0.2441, "step": 2085 }, { "epoch": 0.6513152759347436, "grad_norm": 0.6396167874336243, "learning_rate": 5e-05, "loss": 0.2161, "step": 2086 }, { "epoch": 0.6516275076106471, "grad_norm": 0.6302040815353394, "learning_rate": 5e-05, "loss": 0.2087, "step": 2087 }, { "epoch": 0.6519397392865506, "grad_norm": 0.6229538321495056, "learning_rate": 5e-05, "loss": 0.209, "step": 2088 }, { "epoch": 0.6522519709624541, "grad_norm": 0.6060777306556702, "learning_rate": 5e-05, "loss": 0.2219, "step": 2089 }, { "epoch": 0.6525642026383577, "grad_norm": 0.6480130553245544, "learning_rate": 5e-05, "loss": 0.2352, "step": 2090 }, { "epoch": 0.6528764343142612, "grad_norm": 0.6274617910385132, "learning_rate": 5e-05, "loss": 0.2225, "step": 2091 }, { "epoch": 0.6531886659901647, "grad_norm": 0.6800685524940491, "learning_rate": 5e-05, "loss": 0.2296, "step": 2092 }, { "epoch": 0.6535008976660682, "grad_norm": 0.5766170024871826, "learning_rate": 5e-05, "loss": 0.2056, "step": 2093 }, { "epoch": 0.6538131293419718, "grad_norm": 0.6182262897491455, "learning_rate": 5e-05, "loss": 0.2262, "step": 2094 }, { "epoch": 0.6541253610178752, "grad_norm": 0.6113260984420776, "learning_rate": 5e-05, "loss": 0.2301, "step": 2095 }, { "epoch": 0.6544375926937788, "grad_norm": 0.6263765692710876, "learning_rate": 5e-05, "loss": 0.217, "step": 2096 }, { "epoch": 0.6547498243696823, "grad_norm": 0.6613492369651794, "learning_rate": 5e-05, "loss": 0.2193, "step": 2097 }, { "epoch": 0.6550620560455859, "grad_norm": 0.5936593413352966, "learning_rate": 5e-05, "loss": 0.2109, "step": 2098 }, { "epoch": 0.6553742877214893, "grad_norm": 0.6313296556472778, "learning_rate": 5e-05, "loss": 0.2253, "step": 2099 }, { "epoch": 0.6556865193973929, "grad_norm": 0.6119645833969116, "learning_rate": 5e-05, "loss": 0.2285, "step": 2100 }, { "epoch": 0.6559987510732964, "grad_norm": 0.6035255193710327, "learning_rate": 5e-05, "loss": 0.2247, "step": 2101 }, { "epoch": 0.6563109827491999, "grad_norm": 0.5859045386314392, "learning_rate": 5e-05, "loss": 0.194, "step": 2102 }, { "epoch": 0.6566232144251034, "grad_norm": 0.6390708088874817, "learning_rate": 5e-05, "loss": 0.2096, "step": 2103 }, { "epoch": 0.656935446101007, "grad_norm": 0.6191269755363464, "learning_rate": 5e-05, "loss": 0.2296, "step": 2104 }, { "epoch": 0.6572476777769105, "grad_norm": 0.6057095527648926, "learning_rate": 5e-05, "loss": 0.2203, "step": 2105 }, { "epoch": 0.657559909452814, "grad_norm": 0.6239784359931946, "learning_rate": 5e-05, "loss": 0.2307, "step": 2106 }, { "epoch": 0.6578721411287175, "grad_norm": 0.6309630870819092, "learning_rate": 5e-05, "loss": 0.2235, "step": 2107 }, { "epoch": 0.658184372804621, "grad_norm": 0.6289532780647278, "learning_rate": 5e-05, "loss": 0.2206, "step": 2108 }, { "epoch": 0.6584966044805245, "grad_norm": 0.6462743282318115, "learning_rate": 5e-05, "loss": 0.2248, "step": 2109 }, { "epoch": 0.658808836156428, "grad_norm": 0.6118273735046387, "learning_rate": 5e-05, "loss": 0.2177, "step": 2110 }, { "epoch": 0.6591210678323316, "grad_norm": 0.6410567164421082, "learning_rate": 5e-05, "loss": 0.2323, "step": 2111 }, { "epoch": 0.6594332995082351, "grad_norm": 0.6050724983215332, "learning_rate": 5e-05, "loss": 0.2204, "step": 2112 }, { "epoch": 0.6597455311841386, "grad_norm": 0.5914698839187622, "learning_rate": 5e-05, "loss": 0.2023, "step": 2113 }, { "epoch": 0.6600577628600421, "grad_norm": 0.6221479773521423, "learning_rate": 5e-05, "loss": 0.2229, "step": 2114 }, { "epoch": 0.6603699945359457, "grad_norm": 0.6099815964698792, "learning_rate": 5e-05, "loss": 0.2363, "step": 2115 }, { "epoch": 0.6606822262118492, "grad_norm": 0.6299190521240234, "learning_rate": 5e-05, "loss": 0.2304, "step": 2116 }, { "epoch": 0.6609944578877527, "grad_norm": 0.6209882497787476, "learning_rate": 5e-05, "loss": 0.2229, "step": 2117 }, { "epoch": 0.6613066895636562, "grad_norm": 0.6051134467124939, "learning_rate": 5e-05, "loss": 0.2138, "step": 2118 }, { "epoch": 0.6616189212395598, "grad_norm": 0.5885667204856873, "learning_rate": 5e-05, "loss": 0.2054, "step": 2119 }, { "epoch": 0.6619311529154632, "grad_norm": 0.5941891074180603, "learning_rate": 5e-05, "loss": 0.207, "step": 2120 }, { "epoch": 0.6622433845913668, "grad_norm": 0.6231564283370972, "learning_rate": 5e-05, "loss": 0.2141, "step": 2121 }, { "epoch": 0.6625556162672703, "grad_norm": 0.551883339881897, "learning_rate": 5e-05, "loss": 0.1935, "step": 2122 }, { "epoch": 0.6628678479431739, "grad_norm": 0.6070153117179871, "learning_rate": 5e-05, "loss": 0.2144, "step": 2123 }, { "epoch": 0.6631800796190773, "grad_norm": 0.6057989001274109, "learning_rate": 5e-05, "loss": 0.2221, "step": 2124 }, { "epoch": 0.6634923112949809, "grad_norm": 0.6084137558937073, "learning_rate": 5e-05, "loss": 0.2066, "step": 2125 }, { "epoch": 0.6638045429708844, "grad_norm": 0.6299135684967041, "learning_rate": 5e-05, "loss": 0.2273, "step": 2126 }, { "epoch": 0.6641167746467879, "grad_norm": 0.6807795763015747, "learning_rate": 5e-05, "loss": 0.2495, "step": 2127 }, { "epoch": 0.6644290063226914, "grad_norm": 0.6321357488632202, "learning_rate": 5e-05, "loss": 0.2415, "step": 2128 }, { "epoch": 0.664741237998595, "grad_norm": 0.6546749472618103, "learning_rate": 5e-05, "loss": 0.2179, "step": 2129 }, { "epoch": 0.6650534696744985, "grad_norm": 0.6037410497665405, "learning_rate": 5e-05, "loss": 0.2046, "step": 2130 }, { "epoch": 0.665365701350402, "grad_norm": 0.5991830825805664, "learning_rate": 5e-05, "loss": 0.2177, "step": 2131 }, { "epoch": 0.6656779330263055, "grad_norm": 0.6283982396125793, "learning_rate": 5e-05, "loss": 0.2233, "step": 2132 }, { "epoch": 0.6659901647022091, "grad_norm": 0.6229997873306274, "learning_rate": 5e-05, "loss": 0.2142, "step": 2133 }, { "epoch": 0.6663023963781125, "grad_norm": 0.6688320636749268, "learning_rate": 5e-05, "loss": 0.2223, "step": 2134 }, { "epoch": 0.6666146280540161, "grad_norm": 0.5852001309394836, "learning_rate": 5e-05, "loss": 0.2196, "step": 2135 }, { "epoch": 0.6669268597299196, "grad_norm": 0.6032058000564575, "learning_rate": 5e-05, "loss": 0.2279, "step": 2136 }, { "epoch": 0.6672390914058232, "grad_norm": 0.6171314120292664, "learning_rate": 5e-05, "loss": 0.2031, "step": 2137 }, { "epoch": 0.6675513230817266, "grad_norm": 0.6448203325271606, "learning_rate": 5e-05, "loss": 0.2029, "step": 2138 }, { "epoch": 0.6678635547576302, "grad_norm": 0.6346386075019836, "learning_rate": 5e-05, "loss": 0.222, "step": 2139 }, { "epoch": 0.6681757864335337, "grad_norm": 0.6268606185913086, "learning_rate": 5e-05, "loss": 0.2296, "step": 2140 }, { "epoch": 0.6684880181094373, "grad_norm": 0.6135911345481873, "learning_rate": 5e-05, "loss": 0.2117, "step": 2141 }, { "epoch": 0.6688002497853407, "grad_norm": 0.5813359022140503, "learning_rate": 5e-05, "loss": 0.2083, "step": 2142 }, { "epoch": 0.6691124814612442, "grad_norm": 0.6231116056442261, "learning_rate": 5e-05, "loss": 0.2149, "step": 2143 }, { "epoch": 0.6694247131371478, "grad_norm": 0.6077895164489746, "learning_rate": 5e-05, "loss": 0.2199, "step": 2144 }, { "epoch": 0.6697369448130512, "grad_norm": 0.5928354263305664, "learning_rate": 5e-05, "loss": 0.2035, "step": 2145 }, { "epoch": 0.6700491764889548, "grad_norm": 0.6168010830879211, "learning_rate": 5e-05, "loss": 0.2218, "step": 2146 }, { "epoch": 0.6703614081648583, "grad_norm": 0.5769188404083252, "learning_rate": 5e-05, "loss": 0.1983, "step": 2147 }, { "epoch": 0.6706736398407619, "grad_norm": 0.60873943567276, "learning_rate": 5e-05, "loss": 0.1993, "step": 2148 }, { "epoch": 0.6709858715166653, "grad_norm": 0.6506749987602234, "learning_rate": 5e-05, "loss": 0.2241, "step": 2149 }, { "epoch": 0.6712981031925689, "grad_norm": 0.6177363991737366, "learning_rate": 5e-05, "loss": 0.2114, "step": 2150 }, { "epoch": 0.6716103348684724, "grad_norm": 0.5633018612861633, "learning_rate": 5e-05, "loss": 0.1914, "step": 2151 }, { "epoch": 0.6719225665443759, "grad_norm": 0.6020506024360657, "learning_rate": 5e-05, "loss": 0.2164, "step": 2152 }, { "epoch": 0.6722347982202794, "grad_norm": 0.632338285446167, "learning_rate": 5e-05, "loss": 0.2306, "step": 2153 }, { "epoch": 0.672547029896183, "grad_norm": 0.6408675312995911, "learning_rate": 5e-05, "loss": 0.244, "step": 2154 }, { "epoch": 0.6728592615720865, "grad_norm": 0.5820866823196411, "learning_rate": 5e-05, "loss": 0.1998, "step": 2155 }, { "epoch": 0.67317149324799, "grad_norm": 0.5929029583930969, "learning_rate": 5e-05, "loss": 0.216, "step": 2156 }, { "epoch": 0.6734837249238935, "grad_norm": 0.5830338001251221, "learning_rate": 5e-05, "loss": 0.213, "step": 2157 }, { "epoch": 0.6737959565997971, "grad_norm": 0.6678400039672852, "learning_rate": 5e-05, "loss": 0.2363, "step": 2158 }, { "epoch": 0.6741081882757005, "grad_norm": 0.6339504718780518, "learning_rate": 5e-05, "loss": 0.2206, "step": 2159 }, { "epoch": 0.6744204199516041, "grad_norm": 0.6033822298049927, "learning_rate": 5e-05, "loss": 0.2085, "step": 2160 }, { "epoch": 0.6747326516275076, "grad_norm": 0.6240803599357605, "learning_rate": 5e-05, "loss": 0.2197, "step": 2161 }, { "epoch": 0.6750448833034112, "grad_norm": 0.6161998510360718, "learning_rate": 5e-05, "loss": 0.2167, "step": 2162 }, { "epoch": 0.6753571149793146, "grad_norm": 0.5984218716621399, "learning_rate": 5e-05, "loss": 0.2004, "step": 2163 }, { "epoch": 0.6756693466552182, "grad_norm": 0.6278201341629028, "learning_rate": 5e-05, "loss": 0.2058, "step": 2164 }, { "epoch": 0.6759815783311217, "grad_norm": 0.6065270900726318, "learning_rate": 5e-05, "loss": 0.2152, "step": 2165 }, { "epoch": 0.6762938100070253, "grad_norm": 0.590021014213562, "learning_rate": 5e-05, "loss": 0.211, "step": 2166 }, { "epoch": 0.6766060416829287, "grad_norm": 0.6180009245872498, "learning_rate": 5e-05, "loss": 0.2104, "step": 2167 }, { "epoch": 0.6769182733588323, "grad_norm": 0.6032723188400269, "learning_rate": 5e-05, "loss": 0.205, "step": 2168 }, { "epoch": 0.6772305050347358, "grad_norm": 0.6490362286567688, "learning_rate": 5e-05, "loss": 0.2315, "step": 2169 }, { "epoch": 0.6775427367106392, "grad_norm": 0.6440135836601257, "learning_rate": 5e-05, "loss": 0.2125, "step": 2170 }, { "epoch": 0.6778549683865428, "grad_norm": 0.6124927401542664, "learning_rate": 5e-05, "loss": 0.2215, "step": 2171 }, { "epoch": 0.6781672000624464, "grad_norm": 0.6875654458999634, "learning_rate": 5e-05, "loss": 0.2401, "step": 2172 }, { "epoch": 0.6784794317383499, "grad_norm": 0.64641273021698, "learning_rate": 5e-05, "loss": 0.2126, "step": 2173 }, { "epoch": 0.6787916634142533, "grad_norm": 0.6416630744934082, "learning_rate": 5e-05, "loss": 0.2334, "step": 2174 }, { "epoch": 0.6791038950901569, "grad_norm": 0.6606221795082092, "learning_rate": 5e-05, "loss": 0.2253, "step": 2175 }, { "epoch": 0.6794161267660604, "grad_norm": 0.6140896677970886, "learning_rate": 5e-05, "loss": 0.2264, "step": 2176 }, { "epoch": 0.6797283584419639, "grad_norm": 0.6064491271972656, "learning_rate": 5e-05, "loss": 0.2038, "step": 2177 }, { "epoch": 0.6800405901178674, "grad_norm": 0.5725252628326416, "learning_rate": 5e-05, "loss": 0.1891, "step": 2178 }, { "epoch": 0.680352821793771, "grad_norm": 0.6048061847686768, "learning_rate": 5e-05, "loss": 0.2116, "step": 2179 }, { "epoch": 0.6806650534696745, "grad_norm": 0.6392221450805664, "learning_rate": 5e-05, "loss": 0.2168, "step": 2180 }, { "epoch": 0.680977285145578, "grad_norm": 0.6053457856178284, "learning_rate": 5e-05, "loss": 0.2108, "step": 2181 }, { "epoch": 0.6812895168214815, "grad_norm": 0.6332218050956726, "learning_rate": 5e-05, "loss": 0.2199, "step": 2182 }, { "epoch": 0.6816017484973851, "grad_norm": 0.5670251250267029, "learning_rate": 5e-05, "loss": 0.1938, "step": 2183 }, { "epoch": 0.6819139801732885, "grad_norm": 0.6318344473838806, "learning_rate": 5e-05, "loss": 0.2264, "step": 2184 }, { "epoch": 0.6822262118491921, "grad_norm": 0.6086012721061707, "learning_rate": 5e-05, "loss": 0.2184, "step": 2185 }, { "epoch": 0.6825384435250956, "grad_norm": 0.6538101434707642, "learning_rate": 5e-05, "loss": 0.2291, "step": 2186 }, { "epoch": 0.6828506752009992, "grad_norm": 0.5838341116905212, "learning_rate": 5e-05, "loss": 0.1998, "step": 2187 }, { "epoch": 0.6831629068769026, "grad_norm": 0.6101954579353333, "learning_rate": 5e-05, "loss": 0.211, "step": 2188 }, { "epoch": 0.6834751385528062, "grad_norm": 0.6454114317893982, "learning_rate": 5e-05, "loss": 0.23, "step": 2189 }, { "epoch": 0.6837873702287097, "grad_norm": 0.6478846669197083, "learning_rate": 5e-05, "loss": 0.1984, "step": 2190 }, { "epoch": 0.6840996019046133, "grad_norm": 0.6050867438316345, "learning_rate": 5e-05, "loss": 0.2025, "step": 2191 }, { "epoch": 0.6844118335805167, "grad_norm": 0.6379677653312683, "learning_rate": 5e-05, "loss": 0.2242, "step": 2192 }, { "epoch": 0.6847240652564203, "grad_norm": 0.6167853474617004, "learning_rate": 5e-05, "loss": 0.2207, "step": 2193 }, { "epoch": 0.6850362969323238, "grad_norm": 0.5750435590744019, "learning_rate": 5e-05, "loss": 0.204, "step": 2194 }, { "epoch": 0.6853485286082273, "grad_norm": 0.5893797278404236, "learning_rate": 5e-05, "loss": 0.1996, "step": 2195 }, { "epoch": 0.6856607602841308, "grad_norm": 0.5984112620353699, "learning_rate": 5e-05, "loss": 0.2201, "step": 2196 }, { "epoch": 0.6859729919600344, "grad_norm": 0.6153649091720581, "learning_rate": 5e-05, "loss": 0.2337, "step": 2197 }, { "epoch": 0.6862852236359379, "grad_norm": 0.5856079459190369, "learning_rate": 5e-05, "loss": 0.2159, "step": 2198 }, { "epoch": 0.6865974553118414, "grad_norm": 0.6192371249198914, "learning_rate": 5e-05, "loss": 0.2141, "step": 2199 }, { "epoch": 0.6869096869877449, "grad_norm": 0.6280660033226013, "learning_rate": 5e-05, "loss": 0.2164, "step": 2200 }, { "epoch": 0.6872219186636485, "grad_norm": 0.5988561511039734, "learning_rate": 5e-05, "loss": 0.2129, "step": 2201 }, { "epoch": 0.6875341503395519, "grad_norm": 0.5907313227653503, "learning_rate": 5e-05, "loss": 0.2228, "step": 2202 }, { "epoch": 0.6878463820154554, "grad_norm": 0.5740445256233215, "learning_rate": 5e-05, "loss": 0.2194, "step": 2203 }, { "epoch": 0.688158613691359, "grad_norm": 0.6229320764541626, "learning_rate": 5e-05, "loss": 0.2205, "step": 2204 }, { "epoch": 0.6884708453672626, "grad_norm": 0.628532350063324, "learning_rate": 5e-05, "loss": 0.216, "step": 2205 }, { "epoch": 0.688783077043166, "grad_norm": 0.6285635232925415, "learning_rate": 5e-05, "loss": 0.2196, "step": 2206 }, { "epoch": 0.6890953087190695, "grad_norm": 0.5926342606544495, "learning_rate": 5e-05, "loss": 0.2026, "step": 2207 }, { "epoch": 0.6894075403949731, "grad_norm": 0.7008231282234192, "learning_rate": 5e-05, "loss": 0.2264, "step": 2208 }, { "epoch": 0.6897197720708765, "grad_norm": 0.6098629832267761, "learning_rate": 5e-05, "loss": 0.2133, "step": 2209 }, { "epoch": 0.6900320037467801, "grad_norm": 0.6107459664344788, "learning_rate": 5e-05, "loss": 0.2235, "step": 2210 }, { "epoch": 0.6903442354226836, "grad_norm": 0.6402003169059753, "learning_rate": 5e-05, "loss": 0.2262, "step": 2211 }, { "epoch": 0.6906564670985872, "grad_norm": 0.6405547261238098, "learning_rate": 5e-05, "loss": 0.223, "step": 2212 }, { "epoch": 0.6909686987744906, "grad_norm": 0.5894308090209961, "learning_rate": 5e-05, "loss": 0.2124, "step": 2213 }, { "epoch": 0.6912809304503942, "grad_norm": 0.6151286959648132, "learning_rate": 5e-05, "loss": 0.2181, "step": 2214 }, { "epoch": 0.6915931621262977, "grad_norm": 0.6203864812850952, "learning_rate": 5e-05, "loss": 0.2142, "step": 2215 }, { "epoch": 0.6919053938022013, "grad_norm": 0.6080445647239685, "learning_rate": 5e-05, "loss": 0.2082, "step": 2216 }, { "epoch": 0.6922176254781047, "grad_norm": 0.6206770539283752, "learning_rate": 5e-05, "loss": 0.2189, "step": 2217 }, { "epoch": 0.6925298571540083, "grad_norm": 0.6068075299263, "learning_rate": 5e-05, "loss": 0.2084, "step": 2218 }, { "epoch": 0.6928420888299118, "grad_norm": 0.623595118522644, "learning_rate": 5e-05, "loss": 0.2318, "step": 2219 }, { "epoch": 0.6931543205058153, "grad_norm": 0.6495737433433533, "learning_rate": 5e-05, "loss": 0.2154, "step": 2220 }, { "epoch": 0.6934665521817188, "grad_norm": 0.6126834154129028, "learning_rate": 5e-05, "loss": 0.2098, "step": 2221 }, { "epoch": 0.6937787838576224, "grad_norm": 0.5879443287849426, "learning_rate": 5e-05, "loss": 0.2109, "step": 2222 }, { "epoch": 0.6940910155335259, "grad_norm": 0.580331563949585, "learning_rate": 5e-05, "loss": 0.2019, "step": 2223 }, { "epoch": 0.6944032472094294, "grad_norm": 0.6087246537208557, "learning_rate": 5e-05, "loss": 0.2023, "step": 2224 }, { "epoch": 0.6947154788853329, "grad_norm": 0.6283443570137024, "learning_rate": 5e-05, "loss": 0.2145, "step": 2225 }, { "epoch": 0.6950277105612365, "grad_norm": 0.6078490614891052, "learning_rate": 5e-05, "loss": 0.2108, "step": 2226 }, { "epoch": 0.6953399422371399, "grad_norm": 0.6059159636497498, "learning_rate": 5e-05, "loss": 0.214, "step": 2227 }, { "epoch": 0.6956521739130435, "grad_norm": 0.6760260462760925, "learning_rate": 5e-05, "loss": 0.2352, "step": 2228 }, { "epoch": 0.695964405588947, "grad_norm": 0.6189902424812317, "learning_rate": 5e-05, "loss": 0.2107, "step": 2229 }, { "epoch": 0.6962766372648506, "grad_norm": 0.6541475057601929, "learning_rate": 5e-05, "loss": 0.2222, "step": 2230 }, { "epoch": 0.696588868940754, "grad_norm": 0.6398313641548157, "learning_rate": 5e-05, "loss": 0.2278, "step": 2231 }, { "epoch": 0.6969011006166576, "grad_norm": 0.5778045654296875, "learning_rate": 5e-05, "loss": 0.2155, "step": 2232 }, { "epoch": 0.6972133322925611, "grad_norm": 0.597618579864502, "learning_rate": 5e-05, "loss": 0.2074, "step": 2233 }, { "epoch": 0.6975255639684645, "grad_norm": 0.5983875393867493, "learning_rate": 5e-05, "loss": 0.2126, "step": 2234 }, { "epoch": 0.6978377956443681, "grad_norm": 0.6208248734474182, "learning_rate": 5e-05, "loss": 0.2122, "step": 2235 }, { "epoch": 0.6981500273202716, "grad_norm": 0.58338463306427, "learning_rate": 5e-05, "loss": 0.1988, "step": 2236 }, { "epoch": 0.6984622589961752, "grad_norm": 0.5994043946266174, "learning_rate": 5e-05, "loss": 0.2199, "step": 2237 }, { "epoch": 0.6987744906720786, "grad_norm": 0.6384915113449097, "learning_rate": 5e-05, "loss": 0.2207, "step": 2238 }, { "epoch": 0.6990867223479822, "grad_norm": 0.6192854642868042, "learning_rate": 5e-05, "loss": 0.2137, "step": 2239 }, { "epoch": 0.6993989540238857, "grad_norm": 0.583294153213501, "learning_rate": 5e-05, "loss": 0.2039, "step": 2240 }, { "epoch": 0.6997111856997893, "grad_norm": 0.6644500494003296, "learning_rate": 5e-05, "loss": 0.2483, "step": 2241 }, { "epoch": 0.7000234173756927, "grad_norm": 0.5990713238716125, "learning_rate": 5e-05, "loss": 0.2155, "step": 2242 }, { "epoch": 0.7003356490515963, "grad_norm": 0.6280235648155212, "learning_rate": 5e-05, "loss": 0.2166, "step": 2243 }, { "epoch": 0.7006478807274998, "grad_norm": 0.6018584966659546, "learning_rate": 5e-05, "loss": 0.2251, "step": 2244 }, { "epoch": 0.7009601124034033, "grad_norm": 0.6105899810791016, "learning_rate": 5e-05, "loss": 0.2119, "step": 2245 }, { "epoch": 0.7012723440793068, "grad_norm": 0.6504513025283813, "learning_rate": 5e-05, "loss": 0.2366, "step": 2246 }, { "epoch": 0.7015845757552104, "grad_norm": 0.634201169013977, "learning_rate": 5e-05, "loss": 0.2072, "step": 2247 }, { "epoch": 0.7018968074311139, "grad_norm": 0.6597395539283752, "learning_rate": 5e-05, "loss": 0.2197, "step": 2248 }, { "epoch": 0.7022090391070174, "grad_norm": 0.577210009098053, "learning_rate": 5e-05, "loss": 0.1956, "step": 2249 }, { "epoch": 0.7025212707829209, "grad_norm": 0.5853981375694275, "learning_rate": 5e-05, "loss": 0.2065, "step": 2250 }, { "epoch": 0.7028335024588245, "grad_norm": 0.6278411149978638, "learning_rate": 5e-05, "loss": 0.2154, "step": 2251 }, { "epoch": 0.7031457341347279, "grad_norm": 0.6235558390617371, "learning_rate": 5e-05, "loss": 0.2182, "step": 2252 }, { "epoch": 0.7034579658106315, "grad_norm": 0.6215997934341431, "learning_rate": 5e-05, "loss": 0.209, "step": 2253 }, { "epoch": 0.703770197486535, "grad_norm": 0.654841959476471, "learning_rate": 5e-05, "loss": 0.2455, "step": 2254 }, { "epoch": 0.7040824291624386, "grad_norm": 0.6015471816062927, "learning_rate": 5e-05, "loss": 0.2121, "step": 2255 }, { "epoch": 0.704394660838342, "grad_norm": 0.5743776559829712, "learning_rate": 5e-05, "loss": 0.2103, "step": 2256 }, { "epoch": 0.7047068925142456, "grad_norm": 0.6259836554527283, "learning_rate": 5e-05, "loss": 0.2222, "step": 2257 }, { "epoch": 0.7050191241901491, "grad_norm": 0.6233925223350525, "learning_rate": 5e-05, "loss": 0.2153, "step": 2258 }, { "epoch": 0.7053313558660526, "grad_norm": 0.6261028051376343, "learning_rate": 5e-05, "loss": 0.2185, "step": 2259 }, { "epoch": 0.7056435875419561, "grad_norm": 0.6205869317054749, "learning_rate": 5e-05, "loss": 0.2105, "step": 2260 }, { "epoch": 0.7059558192178597, "grad_norm": 0.5978429913520813, "learning_rate": 5e-05, "loss": 0.2229, "step": 2261 }, { "epoch": 0.7062680508937632, "grad_norm": 0.648479163646698, "learning_rate": 5e-05, "loss": 0.2394, "step": 2262 }, { "epoch": 0.7065802825696667, "grad_norm": 0.5806421041488647, "learning_rate": 5e-05, "loss": 0.1962, "step": 2263 }, { "epoch": 0.7068925142455702, "grad_norm": 0.628277063369751, "learning_rate": 5e-05, "loss": 0.2198, "step": 2264 }, { "epoch": 0.7072047459214738, "grad_norm": 0.5740240216255188, "learning_rate": 5e-05, "loss": 0.1958, "step": 2265 }, { "epoch": 0.7075169775973773, "grad_norm": 0.5751851797103882, "learning_rate": 5e-05, "loss": 0.2049, "step": 2266 }, { "epoch": 0.7078292092732807, "grad_norm": 0.6153798699378967, "learning_rate": 5e-05, "loss": 0.2073, "step": 2267 }, { "epoch": 0.7081414409491843, "grad_norm": 0.5964927077293396, "learning_rate": 5e-05, "loss": 0.2152, "step": 2268 }, { "epoch": 0.7084536726250878, "grad_norm": 0.6232737302780151, "learning_rate": 5e-05, "loss": 0.2255, "step": 2269 }, { "epoch": 0.7087659043009913, "grad_norm": 0.6498351097106934, "learning_rate": 5e-05, "loss": 0.2111, "step": 2270 }, { "epoch": 0.7090781359768948, "grad_norm": 0.6024804711341858, "learning_rate": 5e-05, "loss": 0.2327, "step": 2271 }, { "epoch": 0.7093903676527984, "grad_norm": 0.6038530468940735, "learning_rate": 5e-05, "loss": 0.2114, "step": 2272 }, { "epoch": 0.7097025993287019, "grad_norm": 0.5950896739959717, "learning_rate": 5e-05, "loss": 0.2066, "step": 2273 }, { "epoch": 0.7100148310046054, "grad_norm": 0.6066562533378601, "learning_rate": 5e-05, "loss": 0.2154, "step": 2274 }, { "epoch": 0.7103270626805089, "grad_norm": 0.5758070349693298, "learning_rate": 5e-05, "loss": 0.2034, "step": 2275 }, { "epoch": 0.7106392943564125, "grad_norm": 0.5808334350585938, "learning_rate": 5e-05, "loss": 0.2099, "step": 2276 }, { "epoch": 0.7109515260323159, "grad_norm": 0.5930101275444031, "learning_rate": 5e-05, "loss": 0.223, "step": 2277 }, { "epoch": 0.7112637577082195, "grad_norm": 0.5941842794418335, "learning_rate": 5e-05, "loss": 0.2005, "step": 2278 }, { "epoch": 0.711575989384123, "grad_norm": 0.6179443001747131, "learning_rate": 5e-05, "loss": 0.2128, "step": 2279 }, { "epoch": 0.7118882210600266, "grad_norm": 0.60068678855896, "learning_rate": 5e-05, "loss": 0.1985, "step": 2280 }, { "epoch": 0.71220045273593, "grad_norm": 0.5940424203872681, "learning_rate": 5e-05, "loss": 0.2176, "step": 2281 }, { "epoch": 0.7125126844118336, "grad_norm": 0.5713326930999756, "learning_rate": 5e-05, "loss": 0.2058, "step": 2282 }, { "epoch": 0.7128249160877371, "grad_norm": 0.6557428240776062, "learning_rate": 5e-05, "loss": 0.2183, "step": 2283 }, { "epoch": 0.7131371477636406, "grad_norm": 0.5804157257080078, "learning_rate": 5e-05, "loss": 0.2038, "step": 2284 }, { "epoch": 0.7134493794395441, "grad_norm": 0.6486903429031372, "learning_rate": 5e-05, "loss": 0.2285, "step": 2285 }, { "epoch": 0.7137616111154477, "grad_norm": 0.6384773254394531, "learning_rate": 5e-05, "loss": 0.2228, "step": 2286 }, { "epoch": 0.7140738427913512, "grad_norm": 0.6215481162071228, "learning_rate": 5e-05, "loss": 0.2218, "step": 2287 }, { "epoch": 0.7143860744672547, "grad_norm": 0.5858855247497559, "learning_rate": 5e-05, "loss": 0.2214, "step": 2288 }, { "epoch": 0.7146983061431582, "grad_norm": 0.6034303903579712, "learning_rate": 5e-05, "loss": 0.2151, "step": 2289 }, { "epoch": 0.7150105378190618, "grad_norm": 0.6652675271034241, "learning_rate": 5e-05, "loss": 0.2394, "step": 2290 }, { "epoch": 0.7153227694949653, "grad_norm": 0.6108542084693909, "learning_rate": 5e-05, "loss": 0.2061, "step": 2291 }, { "epoch": 0.7156350011708688, "grad_norm": 0.6116336584091187, "learning_rate": 5e-05, "loss": 0.2099, "step": 2292 }, { "epoch": 0.7159472328467723, "grad_norm": 0.6500582695007324, "learning_rate": 5e-05, "loss": 0.2347, "step": 2293 }, { "epoch": 0.7162594645226759, "grad_norm": 0.6609933972358704, "learning_rate": 5e-05, "loss": 0.2257, "step": 2294 }, { "epoch": 0.7165716961985793, "grad_norm": 0.6142661571502686, "learning_rate": 5e-05, "loss": 0.2207, "step": 2295 }, { "epoch": 0.7168839278744829, "grad_norm": 0.5804210901260376, "learning_rate": 5e-05, "loss": 0.2106, "step": 2296 }, { "epoch": 0.7171961595503864, "grad_norm": 0.610206127166748, "learning_rate": 5e-05, "loss": 0.2151, "step": 2297 }, { "epoch": 0.71750839122629, "grad_norm": 0.6458045840263367, "learning_rate": 5e-05, "loss": 0.2252, "step": 2298 }, { "epoch": 0.7178206229021934, "grad_norm": 0.6218125820159912, "learning_rate": 5e-05, "loss": 0.2253, "step": 2299 }, { "epoch": 0.718132854578097, "grad_norm": 0.6235336661338806, "learning_rate": 5e-05, "loss": 0.2135, "step": 2300 }, { "epoch": 0.7184450862540005, "grad_norm": 0.617057740688324, "learning_rate": 5e-05, "loss": 0.2124, "step": 2301 }, { "epoch": 0.7187573179299039, "grad_norm": 0.6242507696151733, "learning_rate": 5e-05, "loss": 0.2053, "step": 2302 }, { "epoch": 0.7190695496058075, "grad_norm": 0.6492128968238831, "learning_rate": 5e-05, "loss": 0.2325, "step": 2303 }, { "epoch": 0.719381781281711, "grad_norm": 0.6478641033172607, "learning_rate": 5e-05, "loss": 0.2189, "step": 2304 }, { "epoch": 0.7196940129576146, "grad_norm": 0.5787356495857239, "learning_rate": 5e-05, "loss": 0.1953, "step": 2305 }, { "epoch": 0.720006244633518, "grad_norm": 0.5946206450462341, "learning_rate": 5e-05, "loss": 0.2127, "step": 2306 }, { "epoch": 0.7203184763094216, "grad_norm": 0.6297274231910706, "learning_rate": 5e-05, "loss": 0.2142, "step": 2307 }, { "epoch": 0.7206307079853251, "grad_norm": 0.5839715600013733, "learning_rate": 5e-05, "loss": 0.2265, "step": 2308 }, { "epoch": 0.7209429396612286, "grad_norm": 0.5938220024108887, "learning_rate": 5e-05, "loss": 0.199, "step": 2309 }, { "epoch": 0.7212551713371321, "grad_norm": 0.5954817533493042, "learning_rate": 5e-05, "loss": 0.206, "step": 2310 }, { "epoch": 0.7215674030130357, "grad_norm": 0.61070317029953, "learning_rate": 5e-05, "loss": 0.2121, "step": 2311 }, { "epoch": 0.7218796346889392, "grad_norm": 0.6404656767845154, "learning_rate": 5e-05, "loss": 0.2267, "step": 2312 }, { "epoch": 0.7221918663648427, "grad_norm": 0.6089431047439575, "learning_rate": 5e-05, "loss": 0.2151, "step": 2313 }, { "epoch": 0.7225040980407462, "grad_norm": 0.6086098551750183, "learning_rate": 5e-05, "loss": 0.2267, "step": 2314 }, { "epoch": 0.7228163297166498, "grad_norm": 0.6179851293563843, "learning_rate": 5e-05, "loss": 0.2115, "step": 2315 }, { "epoch": 0.7231285613925533, "grad_norm": 0.5936406254768372, "learning_rate": 5e-05, "loss": 0.2163, "step": 2316 }, { "epoch": 0.7234407930684568, "grad_norm": 0.6152855753898621, "learning_rate": 5e-05, "loss": 0.2149, "step": 2317 }, { "epoch": 0.7237530247443603, "grad_norm": 0.6112785935401917, "learning_rate": 5e-05, "loss": 0.2041, "step": 2318 }, { "epoch": 0.7240652564202639, "grad_norm": 0.6092502474784851, "learning_rate": 5e-05, "loss": 0.23, "step": 2319 }, { "epoch": 0.7243774880961673, "grad_norm": 0.6414449214935303, "learning_rate": 5e-05, "loss": 0.2305, "step": 2320 }, { "epoch": 0.7246897197720709, "grad_norm": 0.6217430830001831, "learning_rate": 5e-05, "loss": 0.2176, "step": 2321 }, { "epoch": 0.7250019514479744, "grad_norm": 0.5775009393692017, "learning_rate": 5e-05, "loss": 0.203, "step": 2322 }, { "epoch": 0.725314183123878, "grad_norm": 0.6027688384056091, "learning_rate": 5e-05, "loss": 0.2291, "step": 2323 }, { "epoch": 0.7256264147997814, "grad_norm": 0.5933821201324463, "learning_rate": 5e-05, "loss": 0.2093, "step": 2324 }, { "epoch": 0.725938646475685, "grad_norm": 0.5780848860740662, "learning_rate": 5e-05, "loss": 0.1998, "step": 2325 }, { "epoch": 0.7262508781515885, "grad_norm": 0.6016979813575745, "learning_rate": 5e-05, "loss": 0.2194, "step": 2326 }, { "epoch": 0.726563109827492, "grad_norm": 0.5793651938438416, "learning_rate": 5e-05, "loss": 0.2077, "step": 2327 }, { "epoch": 0.7268753415033955, "grad_norm": 0.6308565139770508, "learning_rate": 5e-05, "loss": 0.2163, "step": 2328 }, { "epoch": 0.727187573179299, "grad_norm": 0.6047419905662537, "learning_rate": 5e-05, "loss": 0.2227, "step": 2329 }, { "epoch": 0.7274998048552026, "grad_norm": 0.596072793006897, "learning_rate": 5e-05, "loss": 0.2038, "step": 2330 }, { "epoch": 0.727812036531106, "grad_norm": 0.5925489664077759, "learning_rate": 5e-05, "loss": 0.1997, "step": 2331 }, { "epoch": 0.7281242682070096, "grad_norm": 0.6186103820800781, "learning_rate": 5e-05, "loss": 0.2084, "step": 2332 }, { "epoch": 0.7284364998829131, "grad_norm": 0.5746688842773438, "learning_rate": 5e-05, "loss": 0.197, "step": 2333 }, { "epoch": 0.7287487315588166, "grad_norm": 0.5875864028930664, "learning_rate": 5e-05, "loss": 0.2043, "step": 2334 }, { "epoch": 0.7290609632347201, "grad_norm": 0.600278377532959, "learning_rate": 5e-05, "loss": 0.2099, "step": 2335 }, { "epoch": 0.7293731949106237, "grad_norm": 0.6186339855194092, "learning_rate": 5e-05, "loss": 0.2219, "step": 2336 }, { "epoch": 0.7296854265865272, "grad_norm": 0.6583613157272339, "learning_rate": 5e-05, "loss": 0.208, "step": 2337 }, { "epoch": 0.7299976582624307, "grad_norm": 0.6099117994308472, "learning_rate": 5e-05, "loss": 0.2115, "step": 2338 }, { "epoch": 0.7303098899383342, "grad_norm": 0.6301721930503845, "learning_rate": 5e-05, "loss": 0.2135, "step": 2339 }, { "epoch": 0.7306221216142378, "grad_norm": 0.6154086589813232, "learning_rate": 5e-05, "loss": 0.2229, "step": 2340 }, { "epoch": 0.7309343532901413, "grad_norm": 0.5831058621406555, "learning_rate": 5e-05, "loss": 0.1941, "step": 2341 }, { "epoch": 0.7312465849660448, "grad_norm": 0.605046272277832, "learning_rate": 5e-05, "loss": 0.218, "step": 2342 }, { "epoch": 0.7315588166419483, "grad_norm": 0.6538318991661072, "learning_rate": 5e-05, "loss": 0.2318, "step": 2343 }, { "epoch": 0.7318710483178519, "grad_norm": 0.5995652675628662, "learning_rate": 5e-05, "loss": 0.2139, "step": 2344 }, { "epoch": 0.7321832799937553, "grad_norm": 0.5732847452163696, "learning_rate": 5e-05, "loss": 0.2037, "step": 2345 }, { "epoch": 0.7324955116696589, "grad_norm": 0.6978337168693542, "learning_rate": 5e-05, "loss": 0.2277, "step": 2346 }, { "epoch": 0.7328077433455624, "grad_norm": 0.5997539162635803, "learning_rate": 5e-05, "loss": 0.2182, "step": 2347 }, { "epoch": 0.733119975021466, "grad_norm": 0.5788302421569824, "learning_rate": 5e-05, "loss": 0.1974, "step": 2348 }, { "epoch": 0.7334322066973694, "grad_norm": 0.6143619418144226, "learning_rate": 5e-05, "loss": 0.2156, "step": 2349 }, { "epoch": 0.733744438373273, "grad_norm": 0.6056911945343018, "learning_rate": 5e-05, "loss": 0.2119, "step": 2350 }, { "epoch": 0.7340566700491765, "grad_norm": 0.5422772169113159, "learning_rate": 5e-05, "loss": 0.1796, "step": 2351 }, { "epoch": 0.73436890172508, "grad_norm": 0.594541072845459, "learning_rate": 5e-05, "loss": 0.2302, "step": 2352 }, { "epoch": 0.7346811334009835, "grad_norm": 0.6144841313362122, "learning_rate": 5e-05, "loss": 0.2245, "step": 2353 }, { "epoch": 0.7349933650768871, "grad_norm": 0.6685988306999207, "learning_rate": 5e-05, "loss": 0.2437, "step": 2354 }, { "epoch": 0.7353055967527906, "grad_norm": 0.5728784799575806, "learning_rate": 5e-05, "loss": 0.2189, "step": 2355 }, { "epoch": 0.735617828428694, "grad_norm": 0.5882197618484497, "learning_rate": 5e-05, "loss": 0.2118, "step": 2356 }, { "epoch": 0.7359300601045976, "grad_norm": 0.5875654220581055, "learning_rate": 5e-05, "loss": 0.212, "step": 2357 }, { "epoch": 0.7362422917805012, "grad_norm": 0.6063546538352966, "learning_rate": 5e-05, "loss": 0.2067, "step": 2358 }, { "epoch": 0.7365545234564046, "grad_norm": 0.6868719458580017, "learning_rate": 5e-05, "loss": 0.2276, "step": 2359 }, { "epoch": 0.7368667551323081, "grad_norm": 0.6525372266769409, "learning_rate": 5e-05, "loss": 0.2232, "step": 2360 }, { "epoch": 0.7371789868082117, "grad_norm": 0.627196192741394, "learning_rate": 5e-05, "loss": 0.2172, "step": 2361 }, { "epoch": 0.7374912184841153, "grad_norm": 0.6193963885307312, "learning_rate": 5e-05, "loss": 0.2123, "step": 2362 }, { "epoch": 0.7378034501600187, "grad_norm": 0.6418147683143616, "learning_rate": 5e-05, "loss": 0.2139, "step": 2363 }, { "epoch": 0.7381156818359222, "grad_norm": 0.6250712871551514, "learning_rate": 5e-05, "loss": 0.2227, "step": 2364 }, { "epoch": 0.7384279135118258, "grad_norm": 0.5867894887924194, "learning_rate": 5e-05, "loss": 0.2127, "step": 2365 }, { "epoch": 0.7387401451877293, "grad_norm": 0.595957338809967, "learning_rate": 5e-05, "loss": 0.215, "step": 2366 }, { "epoch": 0.7390523768636328, "grad_norm": 0.6419033408164978, "learning_rate": 5e-05, "loss": 0.2295, "step": 2367 }, { "epoch": 0.7393646085395363, "grad_norm": 0.6479138731956482, "learning_rate": 5e-05, "loss": 0.2437, "step": 2368 }, { "epoch": 0.7396768402154399, "grad_norm": 0.5958611369132996, "learning_rate": 5e-05, "loss": 0.1973, "step": 2369 }, { "epoch": 0.7399890718913433, "grad_norm": 0.5970001220703125, "learning_rate": 5e-05, "loss": 0.2101, "step": 2370 }, { "epoch": 0.7403013035672469, "grad_norm": 0.6147600412368774, "learning_rate": 5e-05, "loss": 0.2244, "step": 2371 }, { "epoch": 0.7406135352431504, "grad_norm": 0.5810714364051819, "learning_rate": 5e-05, "loss": 0.2015, "step": 2372 }, { "epoch": 0.740925766919054, "grad_norm": 0.5963602066040039, "learning_rate": 5e-05, "loss": 0.227, "step": 2373 }, { "epoch": 0.7412379985949574, "grad_norm": 0.6316869854927063, "learning_rate": 5e-05, "loss": 0.2371, "step": 2374 }, { "epoch": 0.741550230270861, "grad_norm": 0.59905606508255, "learning_rate": 5e-05, "loss": 0.213, "step": 2375 }, { "epoch": 0.7418624619467645, "grad_norm": 0.6070786714553833, "learning_rate": 5e-05, "loss": 0.2022, "step": 2376 }, { "epoch": 0.742174693622668, "grad_norm": 0.5979219675064087, "learning_rate": 5e-05, "loss": 0.2198, "step": 2377 }, { "epoch": 0.7424869252985715, "grad_norm": 0.6180666089057922, "learning_rate": 5e-05, "loss": 0.2222, "step": 2378 }, { "epoch": 0.7427991569744751, "grad_norm": 0.6268547773361206, "learning_rate": 5e-05, "loss": 0.2168, "step": 2379 }, { "epoch": 0.7431113886503786, "grad_norm": 0.626478910446167, "learning_rate": 5e-05, "loss": 0.2181, "step": 2380 }, { "epoch": 0.7434236203262821, "grad_norm": 0.6011648178100586, "learning_rate": 5e-05, "loss": 0.2081, "step": 2381 }, { "epoch": 0.7437358520021856, "grad_norm": 0.6525745987892151, "learning_rate": 5e-05, "loss": 0.2276, "step": 2382 }, { "epoch": 0.7440480836780892, "grad_norm": 0.6171519160270691, "learning_rate": 5e-05, "loss": 0.2193, "step": 2383 }, { "epoch": 0.7443603153539926, "grad_norm": 0.5892175436019897, "learning_rate": 5e-05, "loss": 0.2031, "step": 2384 }, { "epoch": 0.7446725470298962, "grad_norm": 0.6111161112785339, "learning_rate": 5e-05, "loss": 0.2204, "step": 2385 }, { "epoch": 0.7449847787057997, "grad_norm": 0.6434178352355957, "learning_rate": 5e-05, "loss": 0.2254, "step": 2386 }, { "epoch": 0.7452970103817033, "grad_norm": 0.6107887029647827, "learning_rate": 5e-05, "loss": 0.2318, "step": 2387 }, { "epoch": 0.7456092420576067, "grad_norm": 0.6169838905334473, "learning_rate": 5e-05, "loss": 0.2209, "step": 2388 }, { "epoch": 0.7459214737335103, "grad_norm": 0.5732512474060059, "learning_rate": 5e-05, "loss": 0.2055, "step": 2389 }, { "epoch": 0.7462337054094138, "grad_norm": 0.6154590249061584, "learning_rate": 5e-05, "loss": 0.2208, "step": 2390 }, { "epoch": 0.7465459370853174, "grad_norm": 0.6191912889480591, "learning_rate": 5e-05, "loss": 0.2227, "step": 2391 }, { "epoch": 0.7468581687612208, "grad_norm": 0.6031867861747742, "learning_rate": 5e-05, "loss": 0.2132, "step": 2392 }, { "epoch": 0.7471704004371243, "grad_norm": 0.6119937896728516, "learning_rate": 5e-05, "loss": 0.2129, "step": 2393 }, { "epoch": 0.7474826321130279, "grad_norm": 0.6306155920028687, "learning_rate": 5e-05, "loss": 0.2392, "step": 2394 }, { "epoch": 0.7477948637889313, "grad_norm": 0.6248051524162292, "learning_rate": 5e-05, "loss": 0.2277, "step": 2395 }, { "epoch": 0.7481070954648349, "grad_norm": 0.6215419173240662, "learning_rate": 5e-05, "loss": 0.2346, "step": 2396 }, { "epoch": 0.7484193271407384, "grad_norm": 0.6445502042770386, "learning_rate": 5e-05, "loss": 0.2393, "step": 2397 }, { "epoch": 0.748731558816642, "grad_norm": 0.5992465615272522, "learning_rate": 5e-05, "loss": 0.2242, "step": 2398 }, { "epoch": 0.7490437904925454, "grad_norm": 0.6393172144889832, "learning_rate": 5e-05, "loss": 0.2274, "step": 2399 }, { "epoch": 0.749356022168449, "grad_norm": 0.6114733219146729, "learning_rate": 5e-05, "loss": 0.2176, "step": 2400 }, { "epoch": 0.7496682538443525, "grad_norm": 0.6275477409362793, "learning_rate": 5e-05, "loss": 0.217, "step": 2401 }, { "epoch": 0.749980485520256, "grad_norm": 0.5903044939041138, "learning_rate": 5e-05, "loss": 0.2124, "step": 2402 }, { "epoch": 0.7502927171961595, "grad_norm": 0.6355380415916443, "learning_rate": 5e-05, "loss": 0.2115, "step": 2403 }, { "epoch": 0.7506049488720631, "grad_norm": 0.591425359249115, "learning_rate": 5e-05, "loss": 0.2028, "step": 2404 }, { "epoch": 0.7509171805479666, "grad_norm": 0.5840221047401428, "learning_rate": 5e-05, "loss": 0.2299, "step": 2405 }, { "epoch": 0.7512294122238701, "grad_norm": 0.6043943166732788, "learning_rate": 5e-05, "loss": 0.2074, "step": 2406 }, { "epoch": 0.7515416438997736, "grad_norm": 0.6141829490661621, "learning_rate": 5e-05, "loss": 0.2066, "step": 2407 }, { "epoch": 0.7518538755756772, "grad_norm": 0.6153725385665894, "learning_rate": 5e-05, "loss": 0.2208, "step": 2408 }, { "epoch": 0.7521661072515806, "grad_norm": 0.6160703301429749, "learning_rate": 5e-05, "loss": 0.1998, "step": 2409 }, { "epoch": 0.7524783389274842, "grad_norm": 0.6074708700180054, "learning_rate": 5e-05, "loss": 0.218, "step": 2410 }, { "epoch": 0.7527905706033877, "grad_norm": 0.5938888192176819, "learning_rate": 5e-05, "loss": 0.2171, "step": 2411 }, { "epoch": 0.7531028022792913, "grad_norm": 0.6195661425590515, "learning_rate": 5e-05, "loss": 0.2246, "step": 2412 }, { "epoch": 0.7534150339551947, "grad_norm": 0.6250846982002258, "learning_rate": 5e-05, "loss": 0.2209, "step": 2413 }, { "epoch": 0.7537272656310983, "grad_norm": 0.6297953724861145, "learning_rate": 5e-05, "loss": 0.2253, "step": 2414 }, { "epoch": 0.7540394973070018, "grad_norm": 0.6015211343765259, "learning_rate": 5e-05, "loss": 0.218, "step": 2415 }, { "epoch": 0.7543517289829054, "grad_norm": 0.6154037714004517, "learning_rate": 5e-05, "loss": 0.226, "step": 2416 }, { "epoch": 0.7546639606588088, "grad_norm": 0.5989086627960205, "learning_rate": 5e-05, "loss": 0.2147, "step": 2417 }, { "epoch": 0.7549761923347124, "grad_norm": 0.5896382927894592, "learning_rate": 5e-05, "loss": 0.2193, "step": 2418 }, { "epoch": 0.7552884240106159, "grad_norm": 0.6441901326179504, "learning_rate": 5e-05, "loss": 0.2208, "step": 2419 }, { "epoch": 0.7556006556865194, "grad_norm": 0.6258741021156311, "learning_rate": 5e-05, "loss": 0.2225, "step": 2420 }, { "epoch": 0.7559128873624229, "grad_norm": 0.6423928737640381, "learning_rate": 5e-05, "loss": 0.2192, "step": 2421 }, { "epoch": 0.7562251190383265, "grad_norm": 0.6348696351051331, "learning_rate": 5e-05, "loss": 0.2226, "step": 2422 }, { "epoch": 0.75653735071423, "grad_norm": 0.5988432168960571, "learning_rate": 5e-05, "loss": 0.2143, "step": 2423 }, { "epoch": 0.7568495823901334, "grad_norm": 0.6266500949859619, "learning_rate": 5e-05, "loss": 0.2173, "step": 2424 }, { "epoch": 0.757161814066037, "grad_norm": 0.5914242267608643, "learning_rate": 5e-05, "loss": 0.2282, "step": 2425 }, { "epoch": 0.7574740457419405, "grad_norm": 0.6349212527275085, "learning_rate": 5e-05, "loss": 0.2234, "step": 2426 }, { "epoch": 0.757786277417844, "grad_norm": 0.6281952857971191, "learning_rate": 5e-05, "loss": 0.2204, "step": 2427 }, { "epoch": 0.7580985090937475, "grad_norm": 0.6225721836090088, "learning_rate": 5e-05, "loss": 0.2187, "step": 2428 }, { "epoch": 0.7584107407696511, "grad_norm": 0.6016956567764282, "learning_rate": 5e-05, "loss": 0.2237, "step": 2429 }, { "epoch": 0.7587229724455546, "grad_norm": 0.6077722311019897, "learning_rate": 5e-05, "loss": 0.2209, "step": 2430 }, { "epoch": 0.7590352041214581, "grad_norm": 0.6317979693412781, "learning_rate": 5e-05, "loss": 0.231, "step": 2431 }, { "epoch": 0.7593474357973616, "grad_norm": 0.6297626495361328, "learning_rate": 5e-05, "loss": 0.2205, "step": 2432 }, { "epoch": 0.7596596674732652, "grad_norm": 0.6198453307151794, "learning_rate": 5e-05, "loss": 0.2129, "step": 2433 }, { "epoch": 0.7599718991491686, "grad_norm": 0.6145687699317932, "learning_rate": 5e-05, "loss": 0.2275, "step": 2434 }, { "epoch": 0.7602841308250722, "grad_norm": 0.572542130947113, "learning_rate": 5e-05, "loss": 0.1962, "step": 2435 }, { "epoch": 0.7605963625009757, "grad_norm": 0.588507354259491, "learning_rate": 5e-05, "loss": 0.2214, "step": 2436 }, { "epoch": 0.7609085941768793, "grad_norm": 0.5736903548240662, "learning_rate": 5e-05, "loss": 0.1973, "step": 2437 }, { "epoch": 0.7612208258527827, "grad_norm": 0.6076328158378601, "learning_rate": 5e-05, "loss": 0.2, "step": 2438 }, { "epoch": 0.7615330575286863, "grad_norm": 0.5867302417755127, "learning_rate": 5e-05, "loss": 0.1992, "step": 2439 }, { "epoch": 0.7618452892045898, "grad_norm": 0.6084708571434021, "learning_rate": 5e-05, "loss": 0.2137, "step": 2440 }, { "epoch": 0.7621575208804934, "grad_norm": 0.6018153429031372, "learning_rate": 5e-05, "loss": 0.2049, "step": 2441 }, { "epoch": 0.7624697525563968, "grad_norm": 0.6121959686279297, "learning_rate": 5e-05, "loss": 0.2034, "step": 2442 }, { "epoch": 0.7627819842323004, "grad_norm": 0.5796517133712769, "learning_rate": 5e-05, "loss": 0.2143, "step": 2443 }, { "epoch": 0.7630942159082039, "grad_norm": 0.5907673835754395, "learning_rate": 5e-05, "loss": 0.2089, "step": 2444 }, { "epoch": 0.7634064475841074, "grad_norm": 0.6215242743492126, "learning_rate": 5e-05, "loss": 0.2256, "step": 2445 }, { "epoch": 0.7637186792600109, "grad_norm": 0.6167763471603394, "learning_rate": 5e-05, "loss": 0.2106, "step": 2446 }, { "epoch": 0.7640309109359145, "grad_norm": 0.5905152559280396, "learning_rate": 5e-05, "loss": 0.2051, "step": 2447 }, { "epoch": 0.764343142611818, "grad_norm": 0.6442142724990845, "learning_rate": 5e-05, "loss": 0.2216, "step": 2448 }, { "epoch": 0.7646553742877215, "grad_norm": 0.6183902025222778, "learning_rate": 5e-05, "loss": 0.2247, "step": 2449 }, { "epoch": 0.764967605963625, "grad_norm": 0.6088964939117432, "learning_rate": 5e-05, "loss": 0.2025, "step": 2450 }, { "epoch": 0.7652798376395286, "grad_norm": 0.6168743968009949, "learning_rate": 5e-05, "loss": 0.2105, "step": 2451 }, { "epoch": 0.765592069315432, "grad_norm": 0.6154438853263855, "learning_rate": 5e-05, "loss": 0.2199, "step": 2452 }, { "epoch": 0.7659043009913356, "grad_norm": 0.5921667814254761, "learning_rate": 5e-05, "loss": 0.2104, "step": 2453 }, { "epoch": 0.7662165326672391, "grad_norm": 0.6096763014793396, "learning_rate": 5e-05, "loss": 0.2161, "step": 2454 }, { "epoch": 0.7665287643431427, "grad_norm": 0.582703709602356, "learning_rate": 5e-05, "loss": 0.2125, "step": 2455 }, { "epoch": 0.7668409960190461, "grad_norm": 0.6043486595153809, "learning_rate": 5e-05, "loss": 0.2133, "step": 2456 }, { "epoch": 0.7671532276949496, "grad_norm": 0.6100246906280518, "learning_rate": 5e-05, "loss": 0.2096, "step": 2457 }, { "epoch": 0.7674654593708532, "grad_norm": 0.6073058843612671, "learning_rate": 5e-05, "loss": 0.2221, "step": 2458 }, { "epoch": 0.7677776910467566, "grad_norm": 0.6029211282730103, "learning_rate": 5e-05, "loss": 0.2183, "step": 2459 }, { "epoch": 0.7680899227226602, "grad_norm": 0.5919411182403564, "learning_rate": 5e-05, "loss": 0.2197, "step": 2460 }, { "epoch": 0.7684021543985637, "grad_norm": 0.5814952254295349, "learning_rate": 5e-05, "loss": 0.2064, "step": 2461 }, { "epoch": 0.7687143860744673, "grad_norm": 0.6025931239128113, "learning_rate": 5e-05, "loss": 0.207, "step": 2462 }, { "epoch": 0.7690266177503707, "grad_norm": 0.600922703742981, "learning_rate": 5e-05, "loss": 0.238, "step": 2463 }, { "epoch": 0.7693388494262743, "grad_norm": 0.6222965717315674, "learning_rate": 5e-05, "loss": 0.2101, "step": 2464 }, { "epoch": 0.7696510811021778, "grad_norm": 0.6121858358383179, "learning_rate": 5e-05, "loss": 0.2172, "step": 2465 }, { "epoch": 0.7699633127780814, "grad_norm": 0.6445747017860413, "learning_rate": 5e-05, "loss": 0.2439, "step": 2466 }, { "epoch": 0.7702755444539848, "grad_norm": 0.6203593015670776, "learning_rate": 5e-05, "loss": 0.2251, "step": 2467 }, { "epoch": 0.7705877761298884, "grad_norm": 0.56023108959198, "learning_rate": 5e-05, "loss": 0.1956, "step": 2468 }, { "epoch": 0.7709000078057919, "grad_norm": 0.650800883769989, "learning_rate": 5e-05, "loss": 0.2365, "step": 2469 }, { "epoch": 0.7712122394816954, "grad_norm": 0.5721884965896606, "learning_rate": 5e-05, "loss": 0.1936, "step": 2470 }, { "epoch": 0.7715244711575989, "grad_norm": 0.6425117254257202, "learning_rate": 5e-05, "loss": 0.2148, "step": 2471 }, { "epoch": 0.7718367028335025, "grad_norm": 0.5894032716751099, "learning_rate": 5e-05, "loss": 0.2008, "step": 2472 }, { "epoch": 0.772148934509406, "grad_norm": 0.6102375388145447, "learning_rate": 5e-05, "loss": 0.2047, "step": 2473 }, { "epoch": 0.7724611661853095, "grad_norm": 0.5882739424705505, "learning_rate": 5e-05, "loss": 0.2212, "step": 2474 }, { "epoch": 0.772773397861213, "grad_norm": 0.6014469265937805, "learning_rate": 5e-05, "loss": 0.2196, "step": 2475 }, { "epoch": 0.7730856295371166, "grad_norm": 0.5979114770889282, "learning_rate": 5e-05, "loss": 0.2259, "step": 2476 }, { "epoch": 0.77339786121302, "grad_norm": 0.5785643458366394, "learning_rate": 5e-05, "loss": 0.2042, "step": 2477 }, { "epoch": 0.7737100928889236, "grad_norm": 0.6093169450759888, "learning_rate": 5e-05, "loss": 0.2308, "step": 2478 }, { "epoch": 0.7740223245648271, "grad_norm": 0.5691248178482056, "learning_rate": 5e-05, "loss": 0.2084, "step": 2479 }, { "epoch": 0.7743345562407307, "grad_norm": 0.5883845090866089, "learning_rate": 5e-05, "loss": 0.211, "step": 2480 }, { "epoch": 0.7746467879166341, "grad_norm": 0.6220720410346985, "learning_rate": 5e-05, "loss": 0.2439, "step": 2481 }, { "epoch": 0.7749590195925377, "grad_norm": 0.6012817621231079, "learning_rate": 5e-05, "loss": 0.2118, "step": 2482 }, { "epoch": 0.7752712512684412, "grad_norm": 0.5856543183326721, "learning_rate": 5e-05, "loss": 0.2023, "step": 2483 }, { "epoch": 0.7755834829443446, "grad_norm": 0.6188816428184509, "learning_rate": 5e-05, "loss": 0.2363, "step": 2484 }, { "epoch": 0.7758957146202482, "grad_norm": 0.5774475932121277, "learning_rate": 5e-05, "loss": 0.2166, "step": 2485 }, { "epoch": 0.7762079462961518, "grad_norm": 0.572411298751831, "learning_rate": 5e-05, "loss": 0.2051, "step": 2486 }, { "epoch": 0.7765201779720553, "grad_norm": 0.5850803256034851, "learning_rate": 5e-05, "loss": 0.2155, "step": 2487 }, { "epoch": 0.7768324096479587, "grad_norm": 0.635204553604126, "learning_rate": 5e-05, "loss": 0.2295, "step": 2488 }, { "epoch": 0.7771446413238623, "grad_norm": 0.6239198446273804, "learning_rate": 5e-05, "loss": 0.2104, "step": 2489 }, { "epoch": 0.7774568729997658, "grad_norm": 0.6133711338043213, "learning_rate": 5e-05, "loss": 0.2225, "step": 2490 }, { "epoch": 0.7777691046756694, "grad_norm": 0.6175106763839722, "learning_rate": 5e-05, "loss": 0.2098, "step": 2491 }, { "epoch": 0.7780813363515728, "grad_norm": 0.6209247708320618, "learning_rate": 5e-05, "loss": 0.2266, "step": 2492 }, { "epoch": 0.7783935680274764, "grad_norm": 0.643045961856842, "learning_rate": 5e-05, "loss": 0.2223, "step": 2493 }, { "epoch": 0.7787057997033799, "grad_norm": 0.6412310600280762, "learning_rate": 5e-05, "loss": 0.2177, "step": 2494 }, { "epoch": 0.7790180313792834, "grad_norm": 0.577008843421936, "learning_rate": 5e-05, "loss": 0.1992, "step": 2495 }, { "epoch": 0.7793302630551869, "grad_norm": 0.5891995429992676, "learning_rate": 5e-05, "loss": 0.2179, "step": 2496 }, { "epoch": 0.7796424947310905, "grad_norm": 0.6372776627540588, "learning_rate": 5e-05, "loss": 0.2273, "step": 2497 }, { "epoch": 0.779954726406994, "grad_norm": 0.594112753868103, "learning_rate": 5e-05, "loss": 0.2052, "step": 2498 }, { "epoch": 0.7802669580828975, "grad_norm": 0.6016589403152466, "learning_rate": 5e-05, "loss": 0.2156, "step": 2499 }, { "epoch": 0.780579189758801, "grad_norm": 0.6037912368774414, "learning_rate": 5e-05, "loss": 0.2206, "step": 2500 }, { "epoch": 0.7808914214347046, "grad_norm": 0.6220571994781494, "learning_rate": 5e-05, "loss": 0.2204, "step": 2501 }, { "epoch": 0.781203653110608, "grad_norm": 0.5794981122016907, "learning_rate": 5e-05, "loss": 0.2134, "step": 2502 }, { "epoch": 0.7815158847865116, "grad_norm": 0.5719797611236572, "learning_rate": 5e-05, "loss": 0.1932, "step": 2503 }, { "epoch": 0.7818281164624151, "grad_norm": 0.5835011005401611, "learning_rate": 5e-05, "loss": 0.205, "step": 2504 }, { "epoch": 0.7821403481383187, "grad_norm": 0.6026967763900757, "learning_rate": 5e-05, "loss": 0.205, "step": 2505 }, { "epoch": 0.7824525798142221, "grad_norm": 0.5897008180618286, "learning_rate": 5e-05, "loss": 0.22, "step": 2506 }, { "epoch": 0.7827648114901257, "grad_norm": 0.6187354922294617, "learning_rate": 5e-05, "loss": 0.2092, "step": 2507 }, { "epoch": 0.7830770431660292, "grad_norm": 0.5783199071884155, "learning_rate": 5e-05, "loss": 0.2044, "step": 2508 }, { "epoch": 0.7833892748419327, "grad_norm": 0.6288344264030457, "learning_rate": 5e-05, "loss": 0.2234, "step": 2509 }, { "epoch": 0.7837015065178362, "grad_norm": 0.5817195773124695, "learning_rate": 5e-05, "loss": 0.2141, "step": 2510 }, { "epoch": 0.7840137381937398, "grad_norm": 0.6150068044662476, "learning_rate": 5e-05, "loss": 0.2161, "step": 2511 }, { "epoch": 0.7843259698696433, "grad_norm": 0.5737404823303223, "learning_rate": 5e-05, "loss": 0.2042, "step": 2512 }, { "epoch": 0.7846382015455468, "grad_norm": 0.613061249256134, "learning_rate": 5e-05, "loss": 0.2334, "step": 2513 }, { "epoch": 0.7849504332214503, "grad_norm": 0.610420823097229, "learning_rate": 5e-05, "loss": 0.2143, "step": 2514 }, { "epoch": 0.7852626648973539, "grad_norm": 0.6238245368003845, "learning_rate": 5e-05, "loss": 0.219, "step": 2515 }, { "epoch": 0.7855748965732574, "grad_norm": 0.5807271003723145, "learning_rate": 5e-05, "loss": 0.211, "step": 2516 }, { "epoch": 0.7858871282491608, "grad_norm": 0.6034393906593323, "learning_rate": 5e-05, "loss": 0.2046, "step": 2517 }, { "epoch": 0.7861993599250644, "grad_norm": 0.6599997878074646, "learning_rate": 5e-05, "loss": 0.2556, "step": 2518 }, { "epoch": 0.786511591600968, "grad_norm": 0.6101549863815308, "learning_rate": 5e-05, "loss": 0.2193, "step": 2519 }, { "epoch": 0.7868238232768714, "grad_norm": 0.6227661371231079, "learning_rate": 5e-05, "loss": 0.2262, "step": 2520 }, { "epoch": 0.7871360549527749, "grad_norm": 0.5831648111343384, "learning_rate": 5e-05, "loss": 0.2045, "step": 2521 }, { "epoch": 0.7874482866286785, "grad_norm": 0.5859748721122742, "learning_rate": 5e-05, "loss": 0.2056, "step": 2522 }, { "epoch": 0.787760518304582, "grad_norm": 0.5945742726325989, "learning_rate": 5e-05, "loss": 0.2015, "step": 2523 }, { "epoch": 0.7880727499804855, "grad_norm": 0.5958227515220642, "learning_rate": 5e-05, "loss": 0.2205, "step": 2524 }, { "epoch": 0.788384981656389, "grad_norm": 0.6139782071113586, "learning_rate": 5e-05, "loss": 0.2251, "step": 2525 }, { "epoch": 0.7886972133322926, "grad_norm": 0.6383568048477173, "learning_rate": 5e-05, "loss": 0.218, "step": 2526 }, { "epoch": 0.789009445008196, "grad_norm": 0.6244081258773804, "learning_rate": 5e-05, "loss": 0.2185, "step": 2527 }, { "epoch": 0.7893216766840996, "grad_norm": 0.6080593466758728, "learning_rate": 5e-05, "loss": 0.2106, "step": 2528 }, { "epoch": 0.7896339083600031, "grad_norm": 0.6054705381393433, "learning_rate": 5e-05, "loss": 0.2231, "step": 2529 }, { "epoch": 0.7899461400359067, "grad_norm": 0.6128535270690918, "learning_rate": 5e-05, "loss": 0.2231, "step": 2530 }, { "epoch": 0.7902583717118101, "grad_norm": 0.5882001519203186, "learning_rate": 5e-05, "loss": 0.2156, "step": 2531 }, { "epoch": 0.7905706033877137, "grad_norm": 0.605478048324585, "learning_rate": 5e-05, "loss": 0.217, "step": 2532 }, { "epoch": 0.7908828350636172, "grad_norm": 0.6031370162963867, "learning_rate": 5e-05, "loss": 0.2186, "step": 2533 }, { "epoch": 0.7911950667395207, "grad_norm": 0.6225010752677917, "learning_rate": 5e-05, "loss": 0.2363, "step": 2534 }, { "epoch": 0.7915072984154242, "grad_norm": 0.5717655420303345, "learning_rate": 5e-05, "loss": 0.2041, "step": 2535 }, { "epoch": 0.7918195300913278, "grad_norm": 0.6221069097518921, "learning_rate": 5e-05, "loss": 0.2143, "step": 2536 }, { "epoch": 0.7921317617672313, "grad_norm": 0.618435800075531, "learning_rate": 5e-05, "loss": 0.2299, "step": 2537 }, { "epoch": 0.7924439934431348, "grad_norm": 0.6245702505111694, "learning_rate": 5e-05, "loss": 0.2189, "step": 2538 }, { "epoch": 0.7927562251190383, "grad_norm": 0.5798971056938171, "learning_rate": 5e-05, "loss": 0.2046, "step": 2539 }, { "epoch": 0.7930684567949419, "grad_norm": 0.6458972692489624, "learning_rate": 5e-05, "loss": 0.2358, "step": 2540 }, { "epoch": 0.7933806884708454, "grad_norm": 0.5916386246681213, "learning_rate": 5e-05, "loss": 0.2115, "step": 2541 }, { "epoch": 0.7936929201467489, "grad_norm": 0.6465575695037842, "learning_rate": 5e-05, "loss": 0.2117, "step": 2542 }, { "epoch": 0.7940051518226524, "grad_norm": 0.6281757354736328, "learning_rate": 5e-05, "loss": 0.2109, "step": 2543 }, { "epoch": 0.794317383498556, "grad_norm": 0.5990691781044006, "learning_rate": 5e-05, "loss": 0.2149, "step": 2544 }, { "epoch": 0.7946296151744594, "grad_norm": 0.5814017057418823, "learning_rate": 5e-05, "loss": 0.1981, "step": 2545 }, { "epoch": 0.794941846850363, "grad_norm": 0.6080833673477173, "learning_rate": 5e-05, "loss": 0.2126, "step": 2546 }, { "epoch": 0.7952540785262665, "grad_norm": 0.5749900937080383, "learning_rate": 5e-05, "loss": 0.1946, "step": 2547 }, { "epoch": 0.7955663102021701, "grad_norm": 0.5705317854881287, "learning_rate": 5e-05, "loss": 0.2106, "step": 2548 }, { "epoch": 0.7958785418780735, "grad_norm": 0.5891273617744446, "learning_rate": 5e-05, "loss": 0.2043, "step": 2549 }, { "epoch": 0.796190773553977, "grad_norm": 0.6200094819068909, "learning_rate": 5e-05, "loss": 0.2133, "step": 2550 }, { "epoch": 0.7965030052298806, "grad_norm": 0.623833417892456, "learning_rate": 5e-05, "loss": 0.217, "step": 2551 }, { "epoch": 0.796815236905784, "grad_norm": 0.61124187707901, "learning_rate": 5e-05, "loss": 0.2092, "step": 2552 }, { "epoch": 0.7971274685816876, "grad_norm": 0.6127198934555054, "learning_rate": 5e-05, "loss": 0.2298, "step": 2553 }, { "epoch": 0.7974397002575911, "grad_norm": 0.6329814791679382, "learning_rate": 5e-05, "loss": 0.2214, "step": 2554 }, { "epoch": 0.7977519319334947, "grad_norm": 0.5994768738746643, "learning_rate": 5e-05, "loss": 0.2223, "step": 2555 }, { "epoch": 0.7980641636093981, "grad_norm": 0.5981721878051758, "learning_rate": 5e-05, "loss": 0.2144, "step": 2556 }, { "epoch": 0.7983763952853017, "grad_norm": 0.5949862599372864, "learning_rate": 5e-05, "loss": 0.2099, "step": 2557 }, { "epoch": 0.7986886269612052, "grad_norm": 0.6233556866645813, "learning_rate": 5e-05, "loss": 0.2179, "step": 2558 }, { "epoch": 0.7990008586371087, "grad_norm": 0.5818814039230347, "learning_rate": 5e-05, "loss": 0.204, "step": 2559 }, { "epoch": 0.7993130903130122, "grad_norm": 0.6183021068572998, "learning_rate": 5e-05, "loss": 0.202, "step": 2560 }, { "epoch": 0.7996253219889158, "grad_norm": 0.5954915881156921, "learning_rate": 5e-05, "loss": 0.2267, "step": 2561 }, { "epoch": 0.7999375536648193, "grad_norm": 0.6108293533325195, "learning_rate": 5e-05, "loss": 0.2137, "step": 2562 }, { "epoch": 0.8002497853407228, "grad_norm": 0.6279028654098511, "learning_rate": 5e-05, "loss": 0.2226, "step": 2563 }, { "epoch": 0.8005620170166263, "grad_norm": 0.6071059703826904, "learning_rate": 5e-05, "loss": 0.2065, "step": 2564 }, { "epoch": 0.8008742486925299, "grad_norm": 0.5889386534690857, "learning_rate": 5e-05, "loss": 0.2111, "step": 2565 }, { "epoch": 0.8011864803684334, "grad_norm": 0.5524096488952637, "learning_rate": 5e-05, "loss": 0.1936, "step": 2566 }, { "epoch": 0.8014987120443369, "grad_norm": 0.6312670707702637, "learning_rate": 5e-05, "loss": 0.2241, "step": 2567 }, { "epoch": 0.8018109437202404, "grad_norm": 0.5993804931640625, "learning_rate": 5e-05, "loss": 0.222, "step": 2568 }, { "epoch": 0.802123175396144, "grad_norm": 0.5973406434059143, "learning_rate": 5e-05, "loss": 0.218, "step": 2569 }, { "epoch": 0.8024354070720474, "grad_norm": 0.5848402380943298, "learning_rate": 5e-05, "loss": 0.1898, "step": 2570 }, { "epoch": 0.802747638747951, "grad_norm": 0.6168309450149536, "learning_rate": 5e-05, "loss": 0.207, "step": 2571 }, { "epoch": 0.8030598704238545, "grad_norm": 0.5905262231826782, "learning_rate": 5e-05, "loss": 0.2152, "step": 2572 }, { "epoch": 0.8033721020997581, "grad_norm": 0.6026282906532288, "learning_rate": 5e-05, "loss": 0.2205, "step": 2573 }, { "epoch": 0.8036843337756615, "grad_norm": 0.6093233823776245, "learning_rate": 5e-05, "loss": 0.2002, "step": 2574 }, { "epoch": 0.8039965654515651, "grad_norm": 0.5939178466796875, "learning_rate": 5e-05, "loss": 0.2071, "step": 2575 }, { "epoch": 0.8043087971274686, "grad_norm": 0.6099826693534851, "learning_rate": 5e-05, "loss": 0.2294, "step": 2576 }, { "epoch": 0.804621028803372, "grad_norm": 0.5971570611000061, "learning_rate": 5e-05, "loss": 0.2127, "step": 2577 }, { "epoch": 0.8049332604792756, "grad_norm": 0.5658718347549438, "learning_rate": 5e-05, "loss": 0.2088, "step": 2578 }, { "epoch": 0.8052454921551792, "grad_norm": 0.6139369010925293, "learning_rate": 5e-05, "loss": 0.2205, "step": 2579 }, { "epoch": 0.8055577238310827, "grad_norm": 0.6030564904212952, "learning_rate": 5e-05, "loss": 0.211, "step": 2580 }, { "epoch": 0.8058699555069861, "grad_norm": 0.6036209464073181, "learning_rate": 5e-05, "loss": 0.2167, "step": 2581 }, { "epoch": 0.8061821871828897, "grad_norm": 0.5663734674453735, "learning_rate": 5e-05, "loss": 0.198, "step": 2582 }, { "epoch": 0.8064944188587932, "grad_norm": 0.5919235944747925, "learning_rate": 5e-05, "loss": 0.2195, "step": 2583 }, { "epoch": 0.8068066505346967, "grad_norm": 0.6190772652626038, "learning_rate": 5e-05, "loss": 0.2179, "step": 2584 }, { "epoch": 0.8071188822106002, "grad_norm": 0.5796468257904053, "learning_rate": 5e-05, "loss": 0.1899, "step": 2585 }, { "epoch": 0.8074311138865038, "grad_norm": 0.614510715007782, "learning_rate": 5e-05, "loss": 0.202, "step": 2586 }, { "epoch": 0.8077433455624073, "grad_norm": 0.6143738031387329, "learning_rate": 5e-05, "loss": 0.2109, "step": 2587 }, { "epoch": 0.8080555772383108, "grad_norm": 0.6160396337509155, "learning_rate": 5e-05, "loss": 0.2293, "step": 2588 }, { "epoch": 0.8083678089142143, "grad_norm": 0.5736902356147766, "learning_rate": 5e-05, "loss": 0.2129, "step": 2589 }, { "epoch": 0.8086800405901179, "grad_norm": 0.6081119775772095, "learning_rate": 5e-05, "loss": 0.2259, "step": 2590 }, { "epoch": 0.8089922722660214, "grad_norm": 0.6018806099891663, "learning_rate": 5e-05, "loss": 0.2171, "step": 2591 }, { "epoch": 0.8093045039419249, "grad_norm": 0.5774819254875183, "learning_rate": 5e-05, "loss": 0.2045, "step": 2592 }, { "epoch": 0.8096167356178284, "grad_norm": 0.5613961815834045, "learning_rate": 5e-05, "loss": 0.2152, "step": 2593 }, { "epoch": 0.809928967293732, "grad_norm": 0.6043633222579956, "learning_rate": 5e-05, "loss": 0.2173, "step": 2594 }, { "epoch": 0.8102411989696354, "grad_norm": 0.6077883839607239, "learning_rate": 5e-05, "loss": 0.2109, "step": 2595 }, { "epoch": 0.810553430645539, "grad_norm": 0.6365798115730286, "learning_rate": 5e-05, "loss": 0.214, "step": 2596 }, { "epoch": 0.8108656623214425, "grad_norm": 0.5608327388763428, "learning_rate": 5e-05, "loss": 0.2032, "step": 2597 }, { "epoch": 0.8111778939973461, "grad_norm": 0.6221548318862915, "learning_rate": 5e-05, "loss": 0.2266, "step": 2598 }, { "epoch": 0.8114901256732495, "grad_norm": 0.563684344291687, "learning_rate": 5e-05, "loss": 0.2055, "step": 2599 }, { "epoch": 0.8118023573491531, "grad_norm": 0.6307751536369324, "learning_rate": 5e-05, "loss": 0.2412, "step": 2600 }, { "epoch": 0.8121145890250566, "grad_norm": 0.605438768863678, "learning_rate": 5e-05, "loss": 0.2132, "step": 2601 }, { "epoch": 0.8124268207009601, "grad_norm": 0.5809692144393921, "learning_rate": 5e-05, "loss": 0.2129, "step": 2602 }, { "epoch": 0.8127390523768636, "grad_norm": 0.6052108407020569, "learning_rate": 5e-05, "loss": 0.2113, "step": 2603 }, { "epoch": 0.8130512840527672, "grad_norm": 0.6058691143989563, "learning_rate": 5e-05, "loss": 0.2155, "step": 2604 }, { "epoch": 0.8133635157286707, "grad_norm": 0.6054551005363464, "learning_rate": 5e-05, "loss": 0.2144, "step": 2605 }, { "epoch": 0.8136757474045742, "grad_norm": 0.5964060425758362, "learning_rate": 5e-05, "loss": 0.2073, "step": 2606 }, { "epoch": 0.8139879790804777, "grad_norm": 0.6090664267539978, "learning_rate": 5e-05, "loss": 0.2164, "step": 2607 }, { "epoch": 0.8143002107563813, "grad_norm": 0.622368574142456, "learning_rate": 5e-05, "loss": 0.216, "step": 2608 }, { "epoch": 0.8146124424322847, "grad_norm": 0.5884513258934021, "learning_rate": 5e-05, "loss": 0.2125, "step": 2609 }, { "epoch": 0.8149246741081883, "grad_norm": 0.6231403946876526, "learning_rate": 5e-05, "loss": 0.2122, "step": 2610 }, { "epoch": 0.8152369057840918, "grad_norm": 0.6111131310462952, "learning_rate": 5e-05, "loss": 0.2277, "step": 2611 }, { "epoch": 0.8155491374599954, "grad_norm": 0.5914925932884216, "learning_rate": 5e-05, "loss": 0.2173, "step": 2612 }, { "epoch": 0.8158613691358988, "grad_norm": 0.5855222344398499, "learning_rate": 5e-05, "loss": 0.2086, "step": 2613 }, { "epoch": 0.8161736008118023, "grad_norm": 0.5830764174461365, "learning_rate": 5e-05, "loss": 0.2057, "step": 2614 }, { "epoch": 0.8164858324877059, "grad_norm": 0.6238372921943665, "learning_rate": 5e-05, "loss": 0.2454, "step": 2615 }, { "epoch": 0.8167980641636094, "grad_norm": 0.6173006892204285, "learning_rate": 5e-05, "loss": 0.2178, "step": 2616 }, { "epoch": 0.8171102958395129, "grad_norm": 0.6170943975448608, "learning_rate": 5e-05, "loss": 0.2266, "step": 2617 }, { "epoch": 0.8174225275154164, "grad_norm": 0.5857456922531128, "learning_rate": 5e-05, "loss": 0.2137, "step": 2618 }, { "epoch": 0.81773475919132, "grad_norm": 0.6212829947471619, "learning_rate": 5e-05, "loss": 0.2184, "step": 2619 }, { "epoch": 0.8180469908672234, "grad_norm": 0.6116098165512085, "learning_rate": 5e-05, "loss": 0.2202, "step": 2620 }, { "epoch": 0.818359222543127, "grad_norm": 0.5828195810317993, "learning_rate": 5e-05, "loss": 0.2109, "step": 2621 }, { "epoch": 0.8186714542190305, "grad_norm": 0.6011711359024048, "learning_rate": 5e-05, "loss": 0.2124, "step": 2622 }, { "epoch": 0.8189836858949341, "grad_norm": 0.6456476449966431, "learning_rate": 5e-05, "loss": 0.2228, "step": 2623 }, { "epoch": 0.8192959175708375, "grad_norm": 0.602075457572937, "learning_rate": 5e-05, "loss": 0.2052, "step": 2624 }, { "epoch": 0.8196081492467411, "grad_norm": 0.601361095905304, "learning_rate": 5e-05, "loss": 0.2177, "step": 2625 }, { "epoch": 0.8199203809226446, "grad_norm": 0.613849401473999, "learning_rate": 5e-05, "loss": 0.2101, "step": 2626 }, { "epoch": 0.8202326125985481, "grad_norm": 0.5934547185897827, "learning_rate": 5e-05, "loss": 0.2156, "step": 2627 }, { "epoch": 0.8205448442744516, "grad_norm": 0.5958773493766785, "learning_rate": 5e-05, "loss": 0.2033, "step": 2628 }, { "epoch": 0.8208570759503552, "grad_norm": 0.593434751033783, "learning_rate": 5e-05, "loss": 0.2124, "step": 2629 }, { "epoch": 0.8211693076262587, "grad_norm": 0.6220933794975281, "learning_rate": 5e-05, "loss": 0.2386, "step": 2630 }, { "epoch": 0.8214815393021622, "grad_norm": 0.6251571774482727, "learning_rate": 5e-05, "loss": 0.2209, "step": 2631 }, { "epoch": 0.8217937709780657, "grad_norm": 0.6145174503326416, "learning_rate": 5e-05, "loss": 0.2273, "step": 2632 }, { "epoch": 0.8221060026539693, "grad_norm": 0.6729466915130615, "learning_rate": 5e-05, "loss": 0.2307, "step": 2633 }, { "epoch": 0.8224182343298727, "grad_norm": 0.6247192621231079, "learning_rate": 5e-05, "loss": 0.2303, "step": 2634 }, { "epoch": 0.8227304660057763, "grad_norm": 0.5835302472114563, "learning_rate": 5e-05, "loss": 0.2076, "step": 2635 }, { "epoch": 0.8230426976816798, "grad_norm": 0.5908803939819336, "learning_rate": 5e-05, "loss": 0.2085, "step": 2636 }, { "epoch": 0.8233549293575834, "grad_norm": 0.6428073048591614, "learning_rate": 5e-05, "loss": 0.1994, "step": 2637 }, { "epoch": 0.8236671610334868, "grad_norm": 0.6068177223205566, "learning_rate": 5e-05, "loss": 0.2069, "step": 2638 }, { "epoch": 0.8239793927093904, "grad_norm": 0.6143958568572998, "learning_rate": 5e-05, "loss": 0.2268, "step": 2639 }, { "epoch": 0.8242916243852939, "grad_norm": 0.5704418420791626, "learning_rate": 5e-05, "loss": 0.1902, "step": 2640 }, { "epoch": 0.8246038560611975, "grad_norm": 0.5657652020454407, "learning_rate": 5e-05, "loss": 0.2031, "step": 2641 }, { "epoch": 0.8249160877371009, "grad_norm": 0.5941649675369263, "learning_rate": 5e-05, "loss": 0.2086, "step": 2642 }, { "epoch": 0.8252283194130045, "grad_norm": 0.6334941983222961, "learning_rate": 5e-05, "loss": 0.2236, "step": 2643 }, { "epoch": 0.825540551088908, "grad_norm": 0.6064763069152832, "learning_rate": 5e-05, "loss": 0.2122, "step": 2644 }, { "epoch": 0.8258527827648114, "grad_norm": 0.61709064245224, "learning_rate": 5e-05, "loss": 0.1991, "step": 2645 }, { "epoch": 0.826165014440715, "grad_norm": 0.5891239047050476, "learning_rate": 5e-05, "loss": 0.195, "step": 2646 }, { "epoch": 0.8264772461166185, "grad_norm": 0.617191731929779, "learning_rate": 5e-05, "loss": 0.216, "step": 2647 }, { "epoch": 0.8267894777925221, "grad_norm": 0.6310410499572754, "learning_rate": 5e-05, "loss": 0.2227, "step": 2648 }, { "epoch": 0.8271017094684255, "grad_norm": 0.6048430800437927, "learning_rate": 5e-05, "loss": 0.2137, "step": 2649 }, { "epoch": 0.8274139411443291, "grad_norm": 0.6155039668083191, "learning_rate": 5e-05, "loss": 0.2185, "step": 2650 }, { "epoch": 0.8277261728202326, "grad_norm": 0.612384557723999, "learning_rate": 5e-05, "loss": 0.2158, "step": 2651 }, { "epoch": 0.8280384044961361, "grad_norm": 0.6114656925201416, "learning_rate": 5e-05, "loss": 0.2287, "step": 2652 }, { "epoch": 0.8283506361720396, "grad_norm": 0.6231926083564758, "learning_rate": 5e-05, "loss": 0.2188, "step": 2653 }, { "epoch": 0.8286628678479432, "grad_norm": 0.5940223932266235, "learning_rate": 5e-05, "loss": 0.2042, "step": 2654 }, { "epoch": 0.8289750995238467, "grad_norm": 0.5913362503051758, "learning_rate": 5e-05, "loss": 0.2085, "step": 2655 }, { "epoch": 0.8292873311997502, "grad_norm": 0.5817930698394775, "learning_rate": 5e-05, "loss": 0.203, "step": 2656 }, { "epoch": 0.8295995628756537, "grad_norm": 0.6367718577384949, "learning_rate": 5e-05, "loss": 0.2134, "step": 2657 }, { "epoch": 0.8299117945515573, "grad_norm": 0.6204866170883179, "learning_rate": 5e-05, "loss": 0.2327, "step": 2658 }, { "epoch": 0.8302240262274607, "grad_norm": 0.5772122740745544, "learning_rate": 5e-05, "loss": 0.2065, "step": 2659 }, { "epoch": 0.8305362579033643, "grad_norm": 0.5900555849075317, "learning_rate": 5e-05, "loss": 0.2051, "step": 2660 }, { "epoch": 0.8308484895792678, "grad_norm": 0.5808184146881104, "learning_rate": 5e-05, "loss": 0.1905, "step": 2661 }, { "epoch": 0.8311607212551714, "grad_norm": 0.5723064541816711, "learning_rate": 5e-05, "loss": 0.1988, "step": 2662 }, { "epoch": 0.8314729529310748, "grad_norm": 0.6063392758369446, "learning_rate": 5e-05, "loss": 0.216, "step": 2663 }, { "epoch": 0.8317851846069784, "grad_norm": 0.6436545848846436, "learning_rate": 5e-05, "loss": 0.2345, "step": 2664 }, { "epoch": 0.8320974162828819, "grad_norm": 0.5629483461380005, "learning_rate": 5e-05, "loss": 0.1969, "step": 2665 }, { "epoch": 0.8324096479587855, "grad_norm": 0.6050356030464172, "learning_rate": 5e-05, "loss": 0.21, "step": 2666 }, { "epoch": 0.8327218796346889, "grad_norm": 0.6156876087188721, "learning_rate": 5e-05, "loss": 0.2231, "step": 2667 }, { "epoch": 0.8330341113105925, "grad_norm": 0.6059510111808777, "learning_rate": 5e-05, "loss": 0.2203, "step": 2668 }, { "epoch": 0.833346342986496, "grad_norm": 0.5794352293014526, "learning_rate": 5e-05, "loss": 0.205, "step": 2669 }, { "epoch": 0.8336585746623995, "grad_norm": 0.6267730593681335, "learning_rate": 5e-05, "loss": 0.2374, "step": 2670 }, { "epoch": 0.833970806338303, "grad_norm": 0.6221625804901123, "learning_rate": 5e-05, "loss": 0.2221, "step": 2671 }, { "epoch": 0.8342830380142066, "grad_norm": 0.5853833556175232, "learning_rate": 5e-05, "loss": 0.2039, "step": 2672 }, { "epoch": 0.8345952696901101, "grad_norm": 0.601717472076416, "learning_rate": 5e-05, "loss": 0.2343, "step": 2673 }, { "epoch": 0.8349075013660135, "grad_norm": 0.6409656405448914, "learning_rate": 5e-05, "loss": 0.2071, "step": 2674 }, { "epoch": 0.8352197330419171, "grad_norm": 0.5860868692398071, "learning_rate": 5e-05, "loss": 0.201, "step": 2675 }, { "epoch": 0.8355319647178207, "grad_norm": 0.6111805438995361, "learning_rate": 5e-05, "loss": 0.2183, "step": 2676 }, { "epoch": 0.8358441963937241, "grad_norm": 0.6084411144256592, "learning_rate": 5e-05, "loss": 0.223, "step": 2677 }, { "epoch": 0.8361564280696276, "grad_norm": 0.6335225105285645, "learning_rate": 5e-05, "loss": 0.2237, "step": 2678 }, { "epoch": 0.8364686597455312, "grad_norm": 0.5900620818138123, "learning_rate": 5e-05, "loss": 0.2076, "step": 2679 }, { "epoch": 0.8367808914214347, "grad_norm": 0.5941075682640076, "learning_rate": 5e-05, "loss": 0.2155, "step": 2680 }, { "epoch": 0.8370931230973382, "grad_norm": 0.6015505790710449, "learning_rate": 5e-05, "loss": 0.204, "step": 2681 }, { "epoch": 0.8374053547732417, "grad_norm": 0.5689705014228821, "learning_rate": 5e-05, "loss": 0.2104, "step": 2682 }, { "epoch": 0.8377175864491453, "grad_norm": 0.6017158627510071, "learning_rate": 5e-05, "loss": 0.2045, "step": 2683 }, { "epoch": 0.8380298181250487, "grad_norm": 0.6141164302825928, "learning_rate": 5e-05, "loss": 0.2137, "step": 2684 }, { "epoch": 0.8383420498009523, "grad_norm": 0.599369466304779, "learning_rate": 5e-05, "loss": 0.2148, "step": 2685 }, { "epoch": 0.8386542814768558, "grad_norm": 0.6115162372589111, "learning_rate": 5e-05, "loss": 0.2089, "step": 2686 }, { "epoch": 0.8389665131527594, "grad_norm": 0.6284300684928894, "learning_rate": 5e-05, "loss": 0.222, "step": 2687 }, { "epoch": 0.8392787448286628, "grad_norm": 0.615304172039032, "learning_rate": 5e-05, "loss": 0.2122, "step": 2688 }, { "epoch": 0.8395909765045664, "grad_norm": 0.6125087141990662, "learning_rate": 5e-05, "loss": 0.2293, "step": 2689 }, { "epoch": 0.8399032081804699, "grad_norm": 0.6281982660293579, "learning_rate": 5e-05, "loss": 0.2074, "step": 2690 }, { "epoch": 0.8402154398563735, "grad_norm": 0.6213053464889526, "learning_rate": 5e-05, "loss": 0.2024, "step": 2691 }, { "epoch": 0.8405276715322769, "grad_norm": 0.5784124732017517, "learning_rate": 5e-05, "loss": 0.2087, "step": 2692 }, { "epoch": 0.8408399032081805, "grad_norm": 0.5686057209968567, "learning_rate": 5e-05, "loss": 0.1964, "step": 2693 }, { "epoch": 0.841152134884084, "grad_norm": 0.5856736302375793, "learning_rate": 5e-05, "loss": 0.2129, "step": 2694 }, { "epoch": 0.8414643665599875, "grad_norm": 0.6116244792938232, "learning_rate": 5e-05, "loss": 0.2152, "step": 2695 }, { "epoch": 0.841776598235891, "grad_norm": 0.6540189385414124, "learning_rate": 5e-05, "loss": 0.2253, "step": 2696 }, { "epoch": 0.8420888299117946, "grad_norm": 0.6170265078544617, "learning_rate": 5e-05, "loss": 0.2161, "step": 2697 }, { "epoch": 0.8424010615876981, "grad_norm": 0.6280787587165833, "learning_rate": 5e-05, "loss": 0.1995, "step": 2698 }, { "epoch": 0.8427132932636016, "grad_norm": 0.5690557956695557, "learning_rate": 5e-05, "loss": 0.2039, "step": 2699 }, { "epoch": 0.8430255249395051, "grad_norm": 0.6009449362754822, "learning_rate": 5e-05, "loss": 0.2169, "step": 2700 }, { "epoch": 0.8433377566154087, "grad_norm": 0.6271793246269226, "learning_rate": 5e-05, "loss": 0.2205, "step": 2701 }, { "epoch": 0.8436499882913121, "grad_norm": 0.629973292350769, "learning_rate": 5e-05, "loss": 0.2414, "step": 2702 }, { "epoch": 0.8439622199672157, "grad_norm": 0.6278834342956543, "learning_rate": 5e-05, "loss": 0.2265, "step": 2703 }, { "epoch": 0.8442744516431192, "grad_norm": 0.6516702771186829, "learning_rate": 5e-05, "loss": 0.2311, "step": 2704 }, { "epoch": 0.8445866833190228, "grad_norm": 0.6068213582038879, "learning_rate": 5e-05, "loss": 0.2121, "step": 2705 }, { "epoch": 0.8448989149949262, "grad_norm": 0.5736337900161743, "learning_rate": 5e-05, "loss": 0.2053, "step": 2706 }, { "epoch": 0.8452111466708297, "grad_norm": 0.6304792165756226, "learning_rate": 5e-05, "loss": 0.2349, "step": 2707 }, { "epoch": 0.8455233783467333, "grad_norm": 0.611997127532959, "learning_rate": 5e-05, "loss": 0.2301, "step": 2708 }, { "epoch": 0.8458356100226367, "grad_norm": 0.6070419549942017, "learning_rate": 5e-05, "loss": 0.2128, "step": 2709 }, { "epoch": 0.8461478416985403, "grad_norm": 0.6503846645355225, "learning_rate": 5e-05, "loss": 0.2088, "step": 2710 }, { "epoch": 0.8464600733744438, "grad_norm": 0.6251323819160461, "learning_rate": 5e-05, "loss": 0.2116, "step": 2711 }, { "epoch": 0.8467723050503474, "grad_norm": 0.609963595867157, "learning_rate": 5e-05, "loss": 0.2213, "step": 2712 }, { "epoch": 0.8470845367262508, "grad_norm": 0.6150239109992981, "learning_rate": 5e-05, "loss": 0.2111, "step": 2713 }, { "epoch": 0.8473967684021544, "grad_norm": 0.6393603086471558, "learning_rate": 5e-05, "loss": 0.2206, "step": 2714 }, { "epoch": 0.8477090000780579, "grad_norm": 0.579741358757019, "learning_rate": 5e-05, "loss": 0.205, "step": 2715 }, { "epoch": 0.8480212317539615, "grad_norm": 0.6033657193183899, "learning_rate": 5e-05, "loss": 0.2198, "step": 2716 }, { "epoch": 0.8483334634298649, "grad_norm": 0.5600584149360657, "learning_rate": 5e-05, "loss": 0.2059, "step": 2717 }, { "epoch": 0.8486456951057685, "grad_norm": 0.5718576312065125, "learning_rate": 5e-05, "loss": 0.1863, "step": 2718 }, { "epoch": 0.848957926781672, "grad_norm": 0.6034554243087769, "learning_rate": 5e-05, "loss": 0.2078, "step": 2719 }, { "epoch": 0.8492701584575755, "grad_norm": 0.6057372689247131, "learning_rate": 5e-05, "loss": 0.2076, "step": 2720 }, { "epoch": 0.849582390133479, "grad_norm": 0.5984117984771729, "learning_rate": 5e-05, "loss": 0.2171, "step": 2721 }, { "epoch": 0.8498946218093826, "grad_norm": 0.6019535064697266, "learning_rate": 5e-05, "loss": 0.2228, "step": 2722 }, { "epoch": 0.8502068534852861, "grad_norm": 0.5692775845527649, "learning_rate": 5e-05, "loss": 0.2092, "step": 2723 }, { "epoch": 0.8505190851611896, "grad_norm": 0.5846754312515259, "learning_rate": 5e-05, "loss": 0.209, "step": 2724 }, { "epoch": 0.8508313168370931, "grad_norm": 0.6105127930641174, "learning_rate": 5e-05, "loss": 0.2118, "step": 2725 }, { "epoch": 0.8511435485129967, "grad_norm": 0.59531569480896, "learning_rate": 5e-05, "loss": 0.2085, "step": 2726 }, { "epoch": 0.8514557801889001, "grad_norm": 0.6335719227790833, "learning_rate": 5e-05, "loss": 0.2144, "step": 2727 }, { "epoch": 0.8517680118648037, "grad_norm": 0.5853345990180969, "learning_rate": 5e-05, "loss": 0.2196, "step": 2728 }, { "epoch": 0.8520802435407072, "grad_norm": 0.6407011151313782, "learning_rate": 5e-05, "loss": 0.2166, "step": 2729 }, { "epoch": 0.8523924752166108, "grad_norm": 0.6242491006851196, "learning_rate": 5e-05, "loss": 0.221, "step": 2730 }, { "epoch": 0.8527047068925142, "grad_norm": 0.6263488531112671, "learning_rate": 5e-05, "loss": 0.2232, "step": 2731 }, { "epoch": 0.8530169385684178, "grad_norm": 0.5768280625343323, "learning_rate": 5e-05, "loss": 0.2097, "step": 2732 }, { "epoch": 0.8533291702443213, "grad_norm": 0.5816516876220703, "learning_rate": 5e-05, "loss": 0.1968, "step": 2733 }, { "epoch": 0.8536414019202248, "grad_norm": 0.5999754667282104, "learning_rate": 5e-05, "loss": 0.2195, "step": 2734 }, { "epoch": 0.8539536335961283, "grad_norm": 0.5848363637924194, "learning_rate": 5e-05, "loss": 0.2046, "step": 2735 }, { "epoch": 0.8542658652720319, "grad_norm": 0.6396404504776001, "learning_rate": 5e-05, "loss": 0.229, "step": 2736 }, { "epoch": 0.8545780969479354, "grad_norm": 0.5890987515449524, "learning_rate": 5e-05, "loss": 0.2112, "step": 2737 }, { "epoch": 0.8548903286238388, "grad_norm": 0.596679151058197, "learning_rate": 5e-05, "loss": 0.233, "step": 2738 }, { "epoch": 0.8552025602997424, "grad_norm": 0.5700883269309998, "learning_rate": 5e-05, "loss": 0.2044, "step": 2739 }, { "epoch": 0.855514791975646, "grad_norm": 0.599113941192627, "learning_rate": 5e-05, "loss": 0.2196, "step": 2740 }, { "epoch": 0.8558270236515495, "grad_norm": 0.576764702796936, "learning_rate": 5e-05, "loss": 0.2101, "step": 2741 }, { "epoch": 0.8561392553274529, "grad_norm": 0.5567811727523804, "learning_rate": 5e-05, "loss": 0.205, "step": 2742 }, { "epoch": 0.8564514870033565, "grad_norm": 0.607046902179718, "learning_rate": 5e-05, "loss": 0.2192, "step": 2743 }, { "epoch": 0.85676371867926, "grad_norm": 0.5919545888900757, "learning_rate": 5e-05, "loss": 0.205, "step": 2744 }, { "epoch": 0.8570759503551635, "grad_norm": 0.5538598895072937, "learning_rate": 5e-05, "loss": 0.2014, "step": 2745 }, { "epoch": 0.857388182031067, "grad_norm": 0.6051287055015564, "learning_rate": 5e-05, "loss": 0.2193, "step": 2746 }, { "epoch": 0.8577004137069706, "grad_norm": 0.5757149457931519, "learning_rate": 5e-05, "loss": 0.2239, "step": 2747 }, { "epoch": 0.8580126453828741, "grad_norm": 0.5943527817726135, "learning_rate": 5e-05, "loss": 0.2005, "step": 2748 }, { "epoch": 0.8583248770587776, "grad_norm": 0.6323142647743225, "learning_rate": 5e-05, "loss": 0.2371, "step": 2749 }, { "epoch": 0.8586371087346811, "grad_norm": 0.6299845576286316, "learning_rate": 5e-05, "loss": 0.2116, "step": 2750 }, { "epoch": 0.8589493404105847, "grad_norm": 0.618083119392395, "learning_rate": 5e-05, "loss": 0.1963, "step": 2751 }, { "epoch": 0.8592615720864881, "grad_norm": 0.642321765422821, "learning_rate": 5e-05, "loss": 0.2227, "step": 2752 }, { "epoch": 0.8595738037623917, "grad_norm": 0.6081382036209106, "learning_rate": 5e-05, "loss": 0.2149, "step": 2753 }, { "epoch": 0.8598860354382952, "grad_norm": 0.6123448610305786, "learning_rate": 5e-05, "loss": 0.2012, "step": 2754 }, { "epoch": 0.8601982671141988, "grad_norm": 0.6479710936546326, "learning_rate": 5e-05, "loss": 0.2188, "step": 2755 }, { "epoch": 0.8605104987901022, "grad_norm": 0.5999210476875305, "learning_rate": 5e-05, "loss": 0.2156, "step": 2756 }, { "epoch": 0.8608227304660058, "grad_norm": 0.5730411410331726, "learning_rate": 5e-05, "loss": 0.202, "step": 2757 }, { "epoch": 0.8611349621419093, "grad_norm": 0.5865375995635986, "learning_rate": 5e-05, "loss": 0.2004, "step": 2758 }, { "epoch": 0.8614471938178128, "grad_norm": 0.6309589147567749, "learning_rate": 5e-05, "loss": 0.2169, "step": 2759 }, { "epoch": 0.8617594254937163, "grad_norm": 0.6016408205032349, "learning_rate": 5e-05, "loss": 0.2221, "step": 2760 }, { "epoch": 0.8620716571696199, "grad_norm": 0.5715484023094177, "learning_rate": 5e-05, "loss": 0.1952, "step": 2761 }, { "epoch": 0.8623838888455234, "grad_norm": 0.6063149571418762, "learning_rate": 5e-05, "loss": 0.2283, "step": 2762 }, { "epoch": 0.8626961205214269, "grad_norm": 0.557715654373169, "learning_rate": 5e-05, "loss": 0.2095, "step": 2763 }, { "epoch": 0.8630083521973304, "grad_norm": 0.5678241848945618, "learning_rate": 5e-05, "loss": 0.2037, "step": 2764 }, { "epoch": 0.863320583873234, "grad_norm": 0.6237371563911438, "learning_rate": 5e-05, "loss": 0.2263, "step": 2765 }, { "epoch": 0.8636328155491375, "grad_norm": 0.5772731900215149, "learning_rate": 5e-05, "loss": 0.2041, "step": 2766 }, { "epoch": 0.863945047225041, "grad_norm": 0.5625048875808716, "learning_rate": 5e-05, "loss": 0.1983, "step": 2767 }, { "epoch": 0.8642572789009445, "grad_norm": 0.6244677901268005, "learning_rate": 5e-05, "loss": 0.2244, "step": 2768 }, { "epoch": 0.864569510576848, "grad_norm": 0.5698904991149902, "learning_rate": 5e-05, "loss": 0.1981, "step": 2769 }, { "epoch": 0.8648817422527515, "grad_norm": 0.6293028593063354, "learning_rate": 5e-05, "loss": 0.2176, "step": 2770 }, { "epoch": 0.865193973928655, "grad_norm": 0.6270984411239624, "learning_rate": 5e-05, "loss": 0.2211, "step": 2771 }, { "epoch": 0.8655062056045586, "grad_norm": 0.586211621761322, "learning_rate": 5e-05, "loss": 0.1991, "step": 2772 }, { "epoch": 0.8658184372804621, "grad_norm": 0.5948528051376343, "learning_rate": 5e-05, "loss": 0.2154, "step": 2773 }, { "epoch": 0.8661306689563656, "grad_norm": 0.6063405275344849, "learning_rate": 5e-05, "loss": 0.2184, "step": 2774 }, { "epoch": 0.8664429006322691, "grad_norm": 0.6010774374008179, "learning_rate": 5e-05, "loss": 0.2108, "step": 2775 }, { "epoch": 0.8667551323081727, "grad_norm": 0.6510669589042664, "learning_rate": 5e-05, "loss": 0.2278, "step": 2776 }, { "epoch": 0.8670673639840761, "grad_norm": 0.6032475233078003, "learning_rate": 5e-05, "loss": 0.223, "step": 2777 }, { "epoch": 0.8673795956599797, "grad_norm": 0.6010823249816895, "learning_rate": 5e-05, "loss": 0.2121, "step": 2778 }, { "epoch": 0.8676918273358832, "grad_norm": 0.6124915480613708, "learning_rate": 5e-05, "loss": 0.2127, "step": 2779 }, { "epoch": 0.8680040590117868, "grad_norm": 0.5797703266143799, "learning_rate": 5e-05, "loss": 0.2173, "step": 2780 }, { "epoch": 0.8683162906876902, "grad_norm": 0.5975263714790344, "learning_rate": 5e-05, "loss": 0.2019, "step": 2781 }, { "epoch": 0.8686285223635938, "grad_norm": 0.5822126865386963, "learning_rate": 5e-05, "loss": 0.2178, "step": 2782 }, { "epoch": 0.8689407540394973, "grad_norm": 0.6389249563217163, "learning_rate": 5e-05, "loss": 0.2293, "step": 2783 }, { "epoch": 0.8692529857154008, "grad_norm": 0.5770505666732788, "learning_rate": 5e-05, "loss": 0.1986, "step": 2784 }, { "epoch": 0.8695652173913043, "grad_norm": 0.6118977665901184, "learning_rate": 5e-05, "loss": 0.2074, "step": 2785 }, { "epoch": 0.8698774490672079, "grad_norm": 0.6069666147232056, "learning_rate": 5e-05, "loss": 0.2133, "step": 2786 }, { "epoch": 0.8701896807431114, "grad_norm": 0.589317262172699, "learning_rate": 5e-05, "loss": 0.2071, "step": 2787 }, { "epoch": 0.8705019124190149, "grad_norm": 0.5878819227218628, "learning_rate": 5e-05, "loss": 0.1996, "step": 2788 }, { "epoch": 0.8708141440949184, "grad_norm": 0.65374755859375, "learning_rate": 5e-05, "loss": 0.2231, "step": 2789 }, { "epoch": 0.871126375770822, "grad_norm": 0.6230857968330383, "learning_rate": 5e-05, "loss": 0.2188, "step": 2790 }, { "epoch": 0.8714386074467255, "grad_norm": 0.5977216958999634, "learning_rate": 5e-05, "loss": 0.2174, "step": 2791 }, { "epoch": 0.871750839122629, "grad_norm": 0.6356447339057922, "learning_rate": 5e-05, "loss": 0.1959, "step": 2792 }, { "epoch": 0.8720630707985325, "grad_norm": 0.596777617931366, "learning_rate": 5e-05, "loss": 0.2123, "step": 2793 }, { "epoch": 0.8723753024744361, "grad_norm": 0.5553752779960632, "learning_rate": 5e-05, "loss": 0.2001, "step": 2794 }, { "epoch": 0.8726875341503395, "grad_norm": 0.6152283549308777, "learning_rate": 5e-05, "loss": 0.2077, "step": 2795 }, { "epoch": 0.8729997658262431, "grad_norm": 0.6021025776863098, "learning_rate": 5e-05, "loss": 0.2026, "step": 2796 }, { "epoch": 0.8733119975021466, "grad_norm": 0.6127616763114929, "learning_rate": 5e-05, "loss": 0.2251, "step": 2797 }, { "epoch": 0.8736242291780502, "grad_norm": 0.5928182601928711, "learning_rate": 5e-05, "loss": 0.2108, "step": 2798 }, { "epoch": 0.8739364608539536, "grad_norm": 0.6424016952514648, "learning_rate": 5e-05, "loss": 0.215, "step": 2799 }, { "epoch": 0.8742486925298572, "grad_norm": 0.5903114080429077, "learning_rate": 5e-05, "loss": 0.2171, "step": 2800 }, { "epoch": 0.8745609242057607, "grad_norm": 0.6012512445449829, "learning_rate": 5e-05, "loss": 0.2125, "step": 2801 }, { "epoch": 0.8748731558816641, "grad_norm": 0.5778540968894958, "learning_rate": 5e-05, "loss": 0.2043, "step": 2802 }, { "epoch": 0.8751853875575677, "grad_norm": 0.5865307450294495, "learning_rate": 5e-05, "loss": 0.2245, "step": 2803 }, { "epoch": 0.8754976192334712, "grad_norm": 0.6283510327339172, "learning_rate": 5e-05, "loss": 0.2151, "step": 2804 }, { "epoch": 0.8758098509093748, "grad_norm": 0.5774958729743958, "learning_rate": 5e-05, "loss": 0.2122, "step": 2805 }, { "epoch": 0.8761220825852782, "grad_norm": 0.5846928358078003, "learning_rate": 5e-05, "loss": 0.2079, "step": 2806 }, { "epoch": 0.8764343142611818, "grad_norm": 0.5598517060279846, "learning_rate": 5e-05, "loss": 0.1985, "step": 2807 }, { "epoch": 0.8767465459370853, "grad_norm": 0.5959035158157349, "learning_rate": 5e-05, "loss": 0.2439, "step": 2808 }, { "epoch": 0.8770587776129888, "grad_norm": 0.6100504398345947, "learning_rate": 5e-05, "loss": 0.2219, "step": 2809 }, { "epoch": 0.8773710092888923, "grad_norm": 0.615533173084259, "learning_rate": 5e-05, "loss": 0.2236, "step": 2810 }, { "epoch": 0.8776832409647959, "grad_norm": 0.6549236178398132, "learning_rate": 5e-05, "loss": 0.2338, "step": 2811 }, { "epoch": 0.8779954726406994, "grad_norm": 0.6484773755073547, "learning_rate": 5e-05, "loss": 0.2206, "step": 2812 }, { "epoch": 0.8783077043166029, "grad_norm": 0.6304433345794678, "learning_rate": 5e-05, "loss": 0.2275, "step": 2813 }, { "epoch": 0.8786199359925064, "grad_norm": 0.5752373337745667, "learning_rate": 5e-05, "loss": 0.2105, "step": 2814 }, { "epoch": 0.87893216766841, "grad_norm": 0.588407039642334, "learning_rate": 5e-05, "loss": 0.2132, "step": 2815 }, { "epoch": 0.8792443993443135, "grad_norm": 0.6112781763076782, "learning_rate": 5e-05, "loss": 0.2163, "step": 2816 }, { "epoch": 0.879556631020217, "grad_norm": 0.5853917598724365, "learning_rate": 5e-05, "loss": 0.2164, "step": 2817 }, { "epoch": 0.8798688626961205, "grad_norm": 0.5883681774139404, "learning_rate": 5e-05, "loss": 0.2118, "step": 2818 }, { "epoch": 0.8801810943720241, "grad_norm": 0.599643886089325, "learning_rate": 5e-05, "loss": 0.2084, "step": 2819 }, { "epoch": 0.8804933260479275, "grad_norm": 0.5931037068367004, "learning_rate": 5e-05, "loss": 0.1957, "step": 2820 }, { "epoch": 0.8808055577238311, "grad_norm": 0.569610059261322, "learning_rate": 5e-05, "loss": 0.2022, "step": 2821 }, { "epoch": 0.8811177893997346, "grad_norm": 0.6555206775665283, "learning_rate": 5e-05, "loss": 0.1987, "step": 2822 }, { "epoch": 0.8814300210756382, "grad_norm": 0.6022423505783081, "learning_rate": 5e-05, "loss": 0.226, "step": 2823 }, { "epoch": 0.8817422527515416, "grad_norm": 0.576389491558075, "learning_rate": 5e-05, "loss": 0.2068, "step": 2824 }, { "epoch": 0.8820544844274452, "grad_norm": 0.5839639902114868, "learning_rate": 5e-05, "loss": 0.2099, "step": 2825 }, { "epoch": 0.8823667161033487, "grad_norm": 0.5735760927200317, "learning_rate": 5e-05, "loss": 0.2098, "step": 2826 }, { "epoch": 0.8826789477792522, "grad_norm": 0.5650021433830261, "learning_rate": 5e-05, "loss": 0.2046, "step": 2827 }, { "epoch": 0.8829911794551557, "grad_norm": 0.5685709118843079, "learning_rate": 5e-05, "loss": 0.1899, "step": 2828 }, { "epoch": 0.8833034111310593, "grad_norm": 0.6046416759490967, "learning_rate": 5e-05, "loss": 0.2005, "step": 2829 }, { "epoch": 0.8836156428069628, "grad_norm": 0.586648166179657, "learning_rate": 5e-05, "loss": 0.2075, "step": 2830 }, { "epoch": 0.8839278744828662, "grad_norm": 0.6136389970779419, "learning_rate": 5e-05, "loss": 0.2199, "step": 2831 }, { "epoch": 0.8842401061587698, "grad_norm": 0.591524600982666, "learning_rate": 5e-05, "loss": 0.202, "step": 2832 }, { "epoch": 0.8845523378346734, "grad_norm": 0.6650885343551636, "learning_rate": 5e-05, "loss": 0.2263, "step": 2833 }, { "epoch": 0.8848645695105768, "grad_norm": 0.6223092079162598, "learning_rate": 5e-05, "loss": 0.2192, "step": 2834 }, { "epoch": 0.8851768011864803, "grad_norm": 0.656318724155426, "learning_rate": 5e-05, "loss": 0.2266, "step": 2835 }, { "epoch": 0.8854890328623839, "grad_norm": 0.5850140452384949, "learning_rate": 5e-05, "loss": 0.2026, "step": 2836 }, { "epoch": 0.8858012645382874, "grad_norm": 0.5995323061943054, "learning_rate": 5e-05, "loss": 0.2106, "step": 2837 }, { "epoch": 0.8861134962141909, "grad_norm": 0.5778577327728271, "learning_rate": 5e-05, "loss": 0.2077, "step": 2838 }, { "epoch": 0.8864257278900944, "grad_norm": 0.5899009704589844, "learning_rate": 5e-05, "loss": 0.2072, "step": 2839 }, { "epoch": 0.886737959565998, "grad_norm": 0.6219987869262695, "learning_rate": 5e-05, "loss": 0.2153, "step": 2840 }, { "epoch": 0.8870501912419015, "grad_norm": 0.6142792701721191, "learning_rate": 5e-05, "loss": 0.2146, "step": 2841 }, { "epoch": 0.887362422917805, "grad_norm": 0.5850244760513306, "learning_rate": 5e-05, "loss": 0.2042, "step": 2842 }, { "epoch": 0.8876746545937085, "grad_norm": 0.6046991348266602, "learning_rate": 5e-05, "loss": 0.2176, "step": 2843 }, { "epoch": 0.8879868862696121, "grad_norm": 0.602067232131958, "learning_rate": 5e-05, "loss": 0.2192, "step": 2844 }, { "epoch": 0.8882991179455155, "grad_norm": 0.5829529762268066, "learning_rate": 5e-05, "loss": 0.2054, "step": 2845 }, { "epoch": 0.8886113496214191, "grad_norm": 0.6193014979362488, "learning_rate": 5e-05, "loss": 0.2204, "step": 2846 }, { "epoch": 0.8889235812973226, "grad_norm": 0.620915949344635, "learning_rate": 5e-05, "loss": 0.2323, "step": 2847 }, { "epoch": 0.8892358129732262, "grad_norm": 0.6427889466285706, "learning_rate": 5e-05, "loss": 0.228, "step": 2848 }, { "epoch": 0.8895480446491296, "grad_norm": 0.6026673316955566, "learning_rate": 5e-05, "loss": 0.2088, "step": 2849 }, { "epoch": 0.8898602763250332, "grad_norm": 0.6002302169799805, "learning_rate": 5e-05, "loss": 0.2096, "step": 2850 }, { "epoch": 0.8901725080009367, "grad_norm": 0.5708884596824646, "learning_rate": 5e-05, "loss": 0.2169, "step": 2851 }, { "epoch": 0.8904847396768402, "grad_norm": 0.5971841216087341, "learning_rate": 5e-05, "loss": 0.2087, "step": 2852 }, { "epoch": 0.8907969713527437, "grad_norm": 0.6099109649658203, "learning_rate": 5e-05, "loss": 0.2224, "step": 2853 }, { "epoch": 0.8911092030286473, "grad_norm": 0.6002428531646729, "learning_rate": 5e-05, "loss": 0.2215, "step": 2854 }, { "epoch": 0.8914214347045508, "grad_norm": 0.59951251745224, "learning_rate": 5e-05, "loss": 0.2125, "step": 2855 }, { "epoch": 0.8917336663804543, "grad_norm": 0.5803591012954712, "learning_rate": 5e-05, "loss": 0.1979, "step": 2856 }, { "epoch": 0.8920458980563578, "grad_norm": 0.6413597464561462, "learning_rate": 5e-05, "loss": 0.2034, "step": 2857 }, { "epoch": 0.8923581297322614, "grad_norm": 0.5522850751876831, "learning_rate": 5e-05, "loss": 0.1961, "step": 2858 }, { "epoch": 0.8926703614081648, "grad_norm": 0.6145394444465637, "learning_rate": 5e-05, "loss": 0.2203, "step": 2859 }, { "epoch": 0.8929825930840684, "grad_norm": 0.6208612322807312, "learning_rate": 5e-05, "loss": 0.2281, "step": 2860 }, { "epoch": 0.8932948247599719, "grad_norm": 0.5768280029296875, "learning_rate": 5e-05, "loss": 0.2032, "step": 2861 }, { "epoch": 0.8936070564358755, "grad_norm": 0.578579306602478, "learning_rate": 5e-05, "loss": 0.1947, "step": 2862 }, { "epoch": 0.8939192881117789, "grad_norm": 0.5537701845169067, "learning_rate": 5e-05, "loss": 0.2054, "step": 2863 }, { "epoch": 0.8942315197876824, "grad_norm": 0.6128050088882446, "learning_rate": 5e-05, "loss": 0.2321, "step": 2864 }, { "epoch": 0.894543751463586, "grad_norm": 0.5878952145576477, "learning_rate": 5e-05, "loss": 0.1928, "step": 2865 }, { "epoch": 0.8948559831394896, "grad_norm": 0.6002106070518494, "learning_rate": 5e-05, "loss": 0.2022, "step": 2866 }, { "epoch": 0.895168214815393, "grad_norm": 0.5804856419563293, "learning_rate": 5e-05, "loss": 0.214, "step": 2867 }, { "epoch": 0.8954804464912965, "grad_norm": 0.5669363737106323, "learning_rate": 5e-05, "loss": 0.1967, "step": 2868 }, { "epoch": 0.8957926781672001, "grad_norm": 0.583095133304596, "learning_rate": 5e-05, "loss": 0.2104, "step": 2869 }, { "epoch": 0.8961049098431035, "grad_norm": 0.6114388108253479, "learning_rate": 5e-05, "loss": 0.2174, "step": 2870 }, { "epoch": 0.8964171415190071, "grad_norm": 0.6118849515914917, "learning_rate": 5e-05, "loss": 0.2166, "step": 2871 }, { "epoch": 0.8967293731949106, "grad_norm": 0.5711870789527893, "learning_rate": 5e-05, "loss": 0.2026, "step": 2872 }, { "epoch": 0.8970416048708142, "grad_norm": 0.5771558880805969, "learning_rate": 5e-05, "loss": 0.21, "step": 2873 }, { "epoch": 0.8973538365467176, "grad_norm": 0.6364434957504272, "learning_rate": 5e-05, "loss": 0.2245, "step": 2874 }, { "epoch": 0.8976660682226212, "grad_norm": 0.5593748688697815, "learning_rate": 5e-05, "loss": 0.1875, "step": 2875 }, { "epoch": 0.8979782998985247, "grad_norm": 0.6289857625961304, "learning_rate": 5e-05, "loss": 0.2244, "step": 2876 }, { "epoch": 0.8982905315744282, "grad_norm": 0.6617119908332825, "learning_rate": 5e-05, "loss": 0.241, "step": 2877 }, { "epoch": 0.8986027632503317, "grad_norm": 0.6115853786468506, "learning_rate": 5e-05, "loss": 0.2033, "step": 2878 }, { "epoch": 0.8989149949262353, "grad_norm": 0.6184801459312439, "learning_rate": 5e-05, "loss": 0.2348, "step": 2879 }, { "epoch": 0.8992272266021388, "grad_norm": 0.58230060338974, "learning_rate": 5e-05, "loss": 0.21, "step": 2880 }, { "epoch": 0.8995394582780423, "grad_norm": 0.6097007393836975, "learning_rate": 5e-05, "loss": 0.2122, "step": 2881 }, { "epoch": 0.8998516899539458, "grad_norm": 0.5954651832580566, "learning_rate": 5e-05, "loss": 0.2169, "step": 2882 }, { "epoch": 0.9001639216298494, "grad_norm": 0.6197828054428101, "learning_rate": 5e-05, "loss": 0.2208, "step": 2883 }, { "epoch": 0.9004761533057528, "grad_norm": 0.602688193321228, "learning_rate": 5e-05, "loss": 0.2345, "step": 2884 }, { "epoch": 0.9007883849816564, "grad_norm": 0.6241123080253601, "learning_rate": 5e-05, "loss": 0.2108, "step": 2885 }, { "epoch": 0.9011006166575599, "grad_norm": 0.572021484375, "learning_rate": 5e-05, "loss": 0.2114, "step": 2886 }, { "epoch": 0.9014128483334635, "grad_norm": 0.6371086239814758, "learning_rate": 5e-05, "loss": 0.2295, "step": 2887 }, { "epoch": 0.9017250800093669, "grad_norm": 0.5837476253509521, "learning_rate": 5e-05, "loss": 0.2154, "step": 2888 }, { "epoch": 0.9020373116852705, "grad_norm": 0.590420663356781, "learning_rate": 5e-05, "loss": 0.21, "step": 2889 }, { "epoch": 0.902349543361174, "grad_norm": 0.6415067911148071, "learning_rate": 5e-05, "loss": 0.2142, "step": 2890 }, { "epoch": 0.9026617750370776, "grad_norm": 0.5901246070861816, "learning_rate": 5e-05, "loss": 0.2107, "step": 2891 }, { "epoch": 0.902974006712981, "grad_norm": 0.579980731010437, "learning_rate": 5e-05, "loss": 0.194, "step": 2892 }, { "epoch": 0.9032862383888846, "grad_norm": 0.6135898232460022, "learning_rate": 5e-05, "loss": 0.208, "step": 2893 }, { "epoch": 0.9035984700647881, "grad_norm": 0.6166167259216309, "learning_rate": 5e-05, "loss": 0.2327, "step": 2894 }, { "epoch": 0.9039107017406915, "grad_norm": 0.5710422992706299, "learning_rate": 5e-05, "loss": 0.2098, "step": 2895 }, { "epoch": 0.9042229334165951, "grad_norm": 0.5940604209899902, "learning_rate": 5e-05, "loss": 0.1969, "step": 2896 }, { "epoch": 0.9045351650924986, "grad_norm": 0.5846580862998962, "learning_rate": 5e-05, "loss": 0.2053, "step": 2897 }, { "epoch": 0.9048473967684022, "grad_norm": 0.6120300889015198, "learning_rate": 5e-05, "loss": 0.2225, "step": 2898 }, { "epoch": 0.9051596284443056, "grad_norm": 0.586704432964325, "learning_rate": 5e-05, "loss": 0.2241, "step": 2899 }, { "epoch": 0.9054718601202092, "grad_norm": 0.6375415921211243, "learning_rate": 5e-05, "loss": 0.2364, "step": 2900 }, { "epoch": 0.9057840917961127, "grad_norm": 0.5946367383003235, "learning_rate": 5e-05, "loss": 0.2184, "step": 2901 }, { "epoch": 0.9060963234720162, "grad_norm": 0.5921390652656555, "learning_rate": 5e-05, "loss": 0.2038, "step": 2902 }, { "epoch": 0.9064085551479197, "grad_norm": 0.5903488397598267, "learning_rate": 5e-05, "loss": 0.201, "step": 2903 }, { "epoch": 0.9067207868238233, "grad_norm": 0.6023860573768616, "learning_rate": 5e-05, "loss": 0.2194, "step": 2904 }, { "epoch": 0.9070330184997268, "grad_norm": 0.5894209742546082, "learning_rate": 5e-05, "loss": 0.2061, "step": 2905 }, { "epoch": 0.9073452501756303, "grad_norm": 0.6357865333557129, "learning_rate": 5e-05, "loss": 0.2308, "step": 2906 }, { "epoch": 0.9076574818515338, "grad_norm": 0.607262372970581, "learning_rate": 5e-05, "loss": 0.2318, "step": 2907 }, { "epoch": 0.9079697135274374, "grad_norm": 0.6146484017372131, "learning_rate": 5e-05, "loss": 0.2149, "step": 2908 }, { "epoch": 0.9082819452033408, "grad_norm": 0.6206984519958496, "learning_rate": 5e-05, "loss": 0.2289, "step": 2909 }, { "epoch": 0.9085941768792444, "grad_norm": 0.5976563096046448, "learning_rate": 5e-05, "loss": 0.208, "step": 2910 }, { "epoch": 0.9089064085551479, "grad_norm": 0.6119546890258789, "learning_rate": 5e-05, "loss": 0.2175, "step": 2911 }, { "epoch": 0.9092186402310515, "grad_norm": 0.6106016039848328, "learning_rate": 5e-05, "loss": 0.2155, "step": 2912 }, { "epoch": 0.9095308719069549, "grad_norm": 0.6711645126342773, "learning_rate": 5e-05, "loss": 0.2192, "step": 2913 }, { "epoch": 0.9098431035828585, "grad_norm": 0.6048041582107544, "learning_rate": 5e-05, "loss": 0.2062, "step": 2914 }, { "epoch": 0.910155335258762, "grad_norm": 0.6133817434310913, "learning_rate": 5e-05, "loss": 0.2214, "step": 2915 }, { "epoch": 0.9104675669346656, "grad_norm": 0.6032976508140564, "learning_rate": 5e-05, "loss": 0.217, "step": 2916 }, { "epoch": 0.910779798610569, "grad_norm": 0.6290953159332275, "learning_rate": 5e-05, "loss": 0.2076, "step": 2917 }, { "epoch": 0.9110920302864726, "grad_norm": 0.6346983909606934, "learning_rate": 5e-05, "loss": 0.2247, "step": 2918 }, { "epoch": 0.9114042619623761, "grad_norm": 0.6103678345680237, "learning_rate": 5e-05, "loss": 0.2061, "step": 2919 }, { "epoch": 0.9117164936382796, "grad_norm": 0.6399035453796387, "learning_rate": 5e-05, "loss": 0.2234, "step": 2920 }, { "epoch": 0.9120287253141831, "grad_norm": 0.5787782073020935, "learning_rate": 5e-05, "loss": 0.2103, "step": 2921 }, { "epoch": 0.9123409569900867, "grad_norm": 0.5649815201759338, "learning_rate": 5e-05, "loss": 0.1883, "step": 2922 }, { "epoch": 0.9126531886659902, "grad_norm": 0.5484577417373657, "learning_rate": 5e-05, "loss": 0.1957, "step": 2923 }, { "epoch": 0.9129654203418937, "grad_norm": 0.6138188242912292, "learning_rate": 5e-05, "loss": 0.2188, "step": 2924 }, { "epoch": 0.9132776520177972, "grad_norm": 0.5764594674110413, "learning_rate": 5e-05, "loss": 0.2056, "step": 2925 }, { "epoch": 0.9135898836937008, "grad_norm": 0.5941284894943237, "learning_rate": 5e-05, "loss": 0.2202, "step": 2926 }, { "epoch": 0.9139021153696042, "grad_norm": 0.6245608925819397, "learning_rate": 5e-05, "loss": 0.23, "step": 2927 }, { "epoch": 0.9142143470455077, "grad_norm": 0.6020727157592773, "learning_rate": 5e-05, "loss": 0.1939, "step": 2928 }, { "epoch": 0.9145265787214113, "grad_norm": 0.6122434735298157, "learning_rate": 5e-05, "loss": 0.2087, "step": 2929 }, { "epoch": 0.9148388103973148, "grad_norm": 0.5791457295417786, "learning_rate": 5e-05, "loss": 0.2006, "step": 2930 }, { "epoch": 0.9151510420732183, "grad_norm": 0.6187444925308228, "learning_rate": 5e-05, "loss": 0.2188, "step": 2931 }, { "epoch": 0.9154632737491218, "grad_norm": 0.5970437526702881, "learning_rate": 5e-05, "loss": 0.2134, "step": 2932 }, { "epoch": 0.9157755054250254, "grad_norm": 0.6327024698257446, "learning_rate": 5e-05, "loss": 0.209, "step": 2933 }, { "epoch": 0.9160877371009288, "grad_norm": 0.5587921142578125, "learning_rate": 5e-05, "loss": 0.1978, "step": 2934 }, { "epoch": 0.9163999687768324, "grad_norm": 0.6047586798667908, "learning_rate": 5e-05, "loss": 0.2264, "step": 2935 }, { "epoch": 0.9167122004527359, "grad_norm": 0.5822489857673645, "learning_rate": 5e-05, "loss": 0.1957, "step": 2936 }, { "epoch": 0.9170244321286395, "grad_norm": 0.5772297978401184, "learning_rate": 5e-05, "loss": 0.2106, "step": 2937 }, { "epoch": 0.9173366638045429, "grad_norm": 0.6145853400230408, "learning_rate": 5e-05, "loss": 0.2142, "step": 2938 }, { "epoch": 0.9176488954804465, "grad_norm": 0.6257732510566711, "learning_rate": 5e-05, "loss": 0.2212, "step": 2939 }, { "epoch": 0.91796112715635, "grad_norm": 0.6079660058021545, "learning_rate": 5e-05, "loss": 0.2233, "step": 2940 }, { "epoch": 0.9182733588322536, "grad_norm": 0.5596693754196167, "learning_rate": 5e-05, "loss": 0.2123, "step": 2941 }, { "epoch": 0.918585590508157, "grad_norm": 0.5879486799240112, "learning_rate": 5e-05, "loss": 0.2061, "step": 2942 }, { "epoch": 0.9188978221840606, "grad_norm": 0.602123498916626, "learning_rate": 5e-05, "loss": 0.2044, "step": 2943 }, { "epoch": 0.9192100538599641, "grad_norm": 0.6028972268104553, "learning_rate": 5e-05, "loss": 0.2147, "step": 2944 }, { "epoch": 0.9195222855358676, "grad_norm": 0.6642829179763794, "learning_rate": 5e-05, "loss": 0.2445, "step": 2945 }, { "epoch": 0.9198345172117711, "grad_norm": 0.5882232785224915, "learning_rate": 5e-05, "loss": 0.2003, "step": 2946 }, { "epoch": 0.9201467488876747, "grad_norm": 0.5980508327484131, "learning_rate": 5e-05, "loss": 0.2218, "step": 2947 }, { "epoch": 0.9204589805635782, "grad_norm": 0.6293284296989441, "learning_rate": 5e-05, "loss": 0.2251, "step": 2948 }, { "epoch": 0.9207712122394817, "grad_norm": 0.6131969094276428, "learning_rate": 5e-05, "loss": 0.2245, "step": 2949 }, { "epoch": 0.9210834439153852, "grad_norm": 0.6435120701789856, "learning_rate": 5e-05, "loss": 0.2167, "step": 2950 }, { "epoch": 0.9213956755912888, "grad_norm": 0.6167629957199097, "learning_rate": 5e-05, "loss": 0.2261, "step": 2951 }, { "epoch": 0.9217079072671922, "grad_norm": 0.6086641550064087, "learning_rate": 5e-05, "loss": 0.2234, "step": 2952 }, { "epoch": 0.9220201389430958, "grad_norm": 0.623602569103241, "learning_rate": 5e-05, "loss": 0.2161, "step": 2953 }, { "epoch": 0.9223323706189993, "grad_norm": 0.5722328424453735, "learning_rate": 5e-05, "loss": 0.2118, "step": 2954 }, { "epoch": 0.9226446022949029, "grad_norm": 0.5689215660095215, "learning_rate": 5e-05, "loss": 0.1993, "step": 2955 }, { "epoch": 0.9229568339708063, "grad_norm": 0.6047724485397339, "learning_rate": 5e-05, "loss": 0.2132, "step": 2956 }, { "epoch": 0.9232690656467099, "grad_norm": 0.5515403151512146, "learning_rate": 5e-05, "loss": 0.207, "step": 2957 }, { "epoch": 0.9235812973226134, "grad_norm": 0.5966963768005371, "learning_rate": 5e-05, "loss": 0.2337, "step": 2958 }, { "epoch": 0.9238935289985168, "grad_norm": 0.5788060426712036, "learning_rate": 5e-05, "loss": 0.2091, "step": 2959 }, { "epoch": 0.9242057606744204, "grad_norm": 0.5880439281463623, "learning_rate": 5e-05, "loss": 0.2166, "step": 2960 }, { "epoch": 0.924517992350324, "grad_norm": 0.6107873320579529, "learning_rate": 5e-05, "loss": 0.2193, "step": 2961 }, { "epoch": 0.9248302240262275, "grad_norm": 0.6116974353790283, "learning_rate": 5e-05, "loss": 0.2402, "step": 2962 }, { "epoch": 0.9251424557021309, "grad_norm": 0.579048752784729, "learning_rate": 5e-05, "loss": 0.2111, "step": 2963 }, { "epoch": 0.9254546873780345, "grad_norm": 0.5737884640693665, "learning_rate": 5e-05, "loss": 0.1946, "step": 2964 }, { "epoch": 0.925766919053938, "grad_norm": 0.5633342862129211, "learning_rate": 5e-05, "loss": 0.2042, "step": 2965 }, { "epoch": 0.9260791507298416, "grad_norm": 0.5769203901290894, "learning_rate": 5e-05, "loss": 0.2043, "step": 2966 }, { "epoch": 0.926391382405745, "grad_norm": 0.6033095121383667, "learning_rate": 5e-05, "loss": 0.203, "step": 2967 }, { "epoch": 0.9267036140816486, "grad_norm": 0.5681781768798828, "learning_rate": 5e-05, "loss": 0.2032, "step": 2968 }, { "epoch": 0.9270158457575521, "grad_norm": 0.5862268805503845, "learning_rate": 5e-05, "loss": 0.2095, "step": 2969 }, { "epoch": 0.9273280774334556, "grad_norm": 0.6139093041419983, "learning_rate": 5e-05, "loss": 0.2162, "step": 2970 }, { "epoch": 0.9276403091093591, "grad_norm": 0.6138486266136169, "learning_rate": 5e-05, "loss": 0.2179, "step": 2971 }, { "epoch": 0.9279525407852627, "grad_norm": 0.5932419300079346, "learning_rate": 5e-05, "loss": 0.1989, "step": 2972 }, { "epoch": 0.9282647724611662, "grad_norm": 0.6433693170547485, "learning_rate": 5e-05, "loss": 0.2278, "step": 2973 }, { "epoch": 0.9285770041370697, "grad_norm": 0.5810719728469849, "learning_rate": 5e-05, "loss": 0.2156, "step": 2974 }, { "epoch": 0.9288892358129732, "grad_norm": 0.5837427377700806, "learning_rate": 5e-05, "loss": 0.2209, "step": 2975 }, { "epoch": 0.9292014674888768, "grad_norm": 0.5627228617668152, "learning_rate": 5e-05, "loss": 0.1927, "step": 2976 }, { "epoch": 0.9295136991647802, "grad_norm": 0.6194053292274475, "learning_rate": 5e-05, "loss": 0.2268, "step": 2977 }, { "epoch": 0.9298259308406838, "grad_norm": 0.6363211274147034, "learning_rate": 5e-05, "loss": 0.2255, "step": 2978 }, { "epoch": 0.9301381625165873, "grad_norm": 0.582333505153656, "learning_rate": 5e-05, "loss": 0.2176, "step": 2979 }, { "epoch": 0.9304503941924909, "grad_norm": 0.5923243761062622, "learning_rate": 5e-05, "loss": 0.2173, "step": 2980 }, { "epoch": 0.9307626258683943, "grad_norm": 0.6357342600822449, "learning_rate": 5e-05, "loss": 0.2219, "step": 2981 }, { "epoch": 0.9310748575442979, "grad_norm": 0.5638853311538696, "learning_rate": 5e-05, "loss": 0.1982, "step": 2982 }, { "epoch": 0.9313870892202014, "grad_norm": 0.6088641881942749, "learning_rate": 5e-05, "loss": 0.2125, "step": 2983 }, { "epoch": 0.9316993208961049, "grad_norm": 0.5369557738304138, "learning_rate": 5e-05, "loss": 0.2019, "step": 2984 }, { "epoch": 0.9320115525720084, "grad_norm": 0.5835304260253906, "learning_rate": 5e-05, "loss": 0.1969, "step": 2985 }, { "epoch": 0.932323784247912, "grad_norm": 0.6053053736686707, "learning_rate": 5e-05, "loss": 0.2122, "step": 2986 }, { "epoch": 0.9326360159238155, "grad_norm": 0.5907647609710693, "learning_rate": 5e-05, "loss": 0.2216, "step": 2987 }, { "epoch": 0.932948247599719, "grad_norm": 0.5944868326187134, "learning_rate": 5e-05, "loss": 0.2105, "step": 2988 }, { "epoch": 0.9332604792756225, "grad_norm": 0.592362105846405, "learning_rate": 5e-05, "loss": 0.1967, "step": 2989 }, { "epoch": 0.933572710951526, "grad_norm": 0.5819444060325623, "learning_rate": 5e-05, "loss": 0.2049, "step": 2990 }, { "epoch": 0.9338849426274296, "grad_norm": 0.6297182440757751, "learning_rate": 5e-05, "loss": 0.2478, "step": 2991 }, { "epoch": 0.934197174303333, "grad_norm": 0.6007236838340759, "learning_rate": 5e-05, "loss": 0.2055, "step": 2992 }, { "epoch": 0.9345094059792366, "grad_norm": 0.5693680047988892, "learning_rate": 5e-05, "loss": 0.2191, "step": 2993 }, { "epoch": 0.9348216376551401, "grad_norm": 0.6239158511161804, "learning_rate": 5e-05, "loss": 0.235, "step": 2994 }, { "epoch": 0.9351338693310436, "grad_norm": 0.5964023470878601, "learning_rate": 5e-05, "loss": 0.2263, "step": 2995 }, { "epoch": 0.9354461010069471, "grad_norm": 0.5735844969749451, "learning_rate": 5e-05, "loss": 0.1967, "step": 2996 }, { "epoch": 0.9357583326828507, "grad_norm": 0.6049012541770935, "learning_rate": 5e-05, "loss": 0.2213, "step": 2997 }, { "epoch": 0.9360705643587542, "grad_norm": 0.571596086025238, "learning_rate": 5e-05, "loss": 0.2166, "step": 2998 }, { "epoch": 0.9363827960346577, "grad_norm": 0.6082130670547485, "learning_rate": 5e-05, "loss": 0.2257, "step": 2999 }, { "epoch": 0.9366950277105612, "grad_norm": 0.61019366979599, "learning_rate": 5e-05, "loss": 0.2045, "step": 3000 }, { "epoch": 0.9370072593864648, "grad_norm": 0.5599969625473022, "learning_rate": 5e-05, "loss": 0.2187, "step": 3001 }, { "epoch": 0.9373194910623682, "grad_norm": 0.5753084421157837, "learning_rate": 5e-05, "loss": 0.2045, "step": 3002 }, { "epoch": 0.9376317227382718, "grad_norm": 0.5870047211647034, "learning_rate": 5e-05, "loss": 0.2107, "step": 3003 }, { "epoch": 0.9379439544141753, "grad_norm": 0.6168680191040039, "learning_rate": 5e-05, "loss": 0.2282, "step": 3004 }, { "epoch": 0.9382561860900789, "grad_norm": 0.5832163095474243, "learning_rate": 5e-05, "loss": 0.2051, "step": 3005 }, { "epoch": 0.9385684177659823, "grad_norm": 0.5970110297203064, "learning_rate": 5e-05, "loss": 0.2278, "step": 3006 }, { "epoch": 0.9388806494418859, "grad_norm": 0.5832142233848572, "learning_rate": 5e-05, "loss": 0.2211, "step": 3007 }, { "epoch": 0.9391928811177894, "grad_norm": 0.605360209941864, "learning_rate": 5e-05, "loss": 0.2216, "step": 3008 }, { "epoch": 0.9395051127936929, "grad_norm": 0.6042463183403015, "learning_rate": 5e-05, "loss": 0.2148, "step": 3009 }, { "epoch": 0.9398173444695964, "grad_norm": 0.5937478542327881, "learning_rate": 5e-05, "loss": 0.2212, "step": 3010 }, { "epoch": 0.9401295761455, "grad_norm": 0.6284675598144531, "learning_rate": 5e-05, "loss": 0.2125, "step": 3011 }, { "epoch": 0.9404418078214035, "grad_norm": 0.6081011295318604, "learning_rate": 5e-05, "loss": 0.2078, "step": 3012 }, { "epoch": 0.940754039497307, "grad_norm": 0.5779363512992859, "learning_rate": 5e-05, "loss": 0.2064, "step": 3013 }, { "epoch": 0.9410662711732105, "grad_norm": 0.5917549729347229, "learning_rate": 5e-05, "loss": 0.21, "step": 3014 }, { "epoch": 0.9413785028491141, "grad_norm": 0.5919544696807861, "learning_rate": 5e-05, "loss": 0.2033, "step": 3015 }, { "epoch": 0.9416907345250176, "grad_norm": 0.6231817007064819, "learning_rate": 5e-05, "loss": 0.2344, "step": 3016 }, { "epoch": 0.942002966200921, "grad_norm": 0.5989400148391724, "learning_rate": 5e-05, "loss": 0.2108, "step": 3017 }, { "epoch": 0.9423151978768246, "grad_norm": 0.5549107789993286, "learning_rate": 5e-05, "loss": 0.2007, "step": 3018 }, { "epoch": 0.9426274295527282, "grad_norm": 0.5967212319374084, "learning_rate": 5e-05, "loss": 0.2156, "step": 3019 }, { "epoch": 0.9429396612286316, "grad_norm": 0.6029136776924133, "learning_rate": 5e-05, "loss": 0.2124, "step": 3020 }, { "epoch": 0.9432518929045351, "grad_norm": 0.5722916722297668, "learning_rate": 5e-05, "loss": 0.2105, "step": 3021 }, { "epoch": 0.9435641245804387, "grad_norm": 0.5421395897865295, "learning_rate": 5e-05, "loss": 0.1928, "step": 3022 }, { "epoch": 0.9438763562563423, "grad_norm": 0.6167170405387878, "learning_rate": 5e-05, "loss": 0.2261, "step": 3023 }, { "epoch": 0.9441885879322457, "grad_norm": 0.556503415107727, "learning_rate": 5e-05, "loss": 0.1913, "step": 3024 }, { "epoch": 0.9445008196081492, "grad_norm": 0.5876614451408386, "learning_rate": 5e-05, "loss": 0.2204, "step": 3025 }, { "epoch": 0.9448130512840528, "grad_norm": 0.5984711647033691, "learning_rate": 5e-05, "loss": 0.2146, "step": 3026 }, { "epoch": 0.9451252829599562, "grad_norm": 0.6067750453948975, "learning_rate": 5e-05, "loss": 0.2321, "step": 3027 }, { "epoch": 0.9454375146358598, "grad_norm": 0.5834141373634338, "learning_rate": 5e-05, "loss": 0.209, "step": 3028 }, { "epoch": 0.9457497463117633, "grad_norm": 0.6017066836357117, "learning_rate": 5e-05, "loss": 0.214, "step": 3029 }, { "epoch": 0.9460619779876669, "grad_norm": 0.6181015968322754, "learning_rate": 5e-05, "loss": 0.2314, "step": 3030 }, { "epoch": 0.9463742096635703, "grad_norm": 0.6072700619697571, "learning_rate": 5e-05, "loss": 0.2142, "step": 3031 }, { "epoch": 0.9466864413394739, "grad_norm": 0.5733376741409302, "learning_rate": 5e-05, "loss": 0.2111, "step": 3032 }, { "epoch": 0.9469986730153774, "grad_norm": 0.601874828338623, "learning_rate": 5e-05, "loss": 0.2101, "step": 3033 }, { "epoch": 0.9473109046912809, "grad_norm": 0.5839487314224243, "learning_rate": 5e-05, "loss": 0.2023, "step": 3034 }, { "epoch": 0.9476231363671844, "grad_norm": 0.5967411398887634, "learning_rate": 5e-05, "loss": 0.2098, "step": 3035 }, { "epoch": 0.947935368043088, "grad_norm": 0.6038748025894165, "learning_rate": 5e-05, "loss": 0.2127, "step": 3036 }, { "epoch": 0.9482475997189915, "grad_norm": 0.5989351868629456, "learning_rate": 5e-05, "loss": 0.1959, "step": 3037 }, { "epoch": 0.948559831394895, "grad_norm": 0.6095969676971436, "learning_rate": 5e-05, "loss": 0.2005, "step": 3038 }, { "epoch": 0.9488720630707985, "grad_norm": 0.6260789036750793, "learning_rate": 5e-05, "loss": 0.2276, "step": 3039 }, { "epoch": 0.9491842947467021, "grad_norm": 0.6474996209144592, "learning_rate": 5e-05, "loss": 0.2328, "step": 3040 }, { "epoch": 0.9494965264226056, "grad_norm": 0.6425648331642151, "learning_rate": 5e-05, "loss": 0.2291, "step": 3041 }, { "epoch": 0.9498087580985091, "grad_norm": 0.5772847533226013, "learning_rate": 5e-05, "loss": 0.1979, "step": 3042 }, { "epoch": 0.9501209897744126, "grad_norm": 0.6314143538475037, "learning_rate": 5e-05, "loss": 0.2254, "step": 3043 }, { "epoch": 0.9504332214503162, "grad_norm": 0.594447135925293, "learning_rate": 5e-05, "loss": 0.2042, "step": 3044 }, { "epoch": 0.9507454531262196, "grad_norm": 0.5859397053718567, "learning_rate": 5e-05, "loss": 0.2122, "step": 3045 }, { "epoch": 0.9510576848021232, "grad_norm": 0.6022169589996338, "learning_rate": 5e-05, "loss": 0.2243, "step": 3046 }, { "epoch": 0.9513699164780267, "grad_norm": 0.5813048481941223, "learning_rate": 5e-05, "loss": 0.2165, "step": 3047 }, { "epoch": 0.9516821481539303, "grad_norm": 0.5868440270423889, "learning_rate": 5e-05, "loss": 0.2054, "step": 3048 }, { "epoch": 0.9519943798298337, "grad_norm": 0.5803433656692505, "learning_rate": 5e-05, "loss": 0.2199, "step": 3049 }, { "epoch": 0.9523066115057373, "grad_norm": 0.5616099834442139, "learning_rate": 5e-05, "loss": 0.2145, "step": 3050 }, { "epoch": 0.9526188431816408, "grad_norm": 0.5670779347419739, "learning_rate": 5e-05, "loss": 0.2011, "step": 3051 }, { "epoch": 0.9529310748575442, "grad_norm": 0.5409479737281799, "learning_rate": 5e-05, "loss": 0.1889, "step": 3052 }, { "epoch": 0.9532433065334478, "grad_norm": 0.6353897452354431, "learning_rate": 5e-05, "loss": 0.2231, "step": 3053 }, { "epoch": 0.9535555382093513, "grad_norm": 0.5759361982345581, "learning_rate": 5e-05, "loss": 0.2073, "step": 3054 }, { "epoch": 0.9538677698852549, "grad_norm": 0.5792763233184814, "learning_rate": 5e-05, "loss": 0.2142, "step": 3055 }, { "epoch": 0.9541800015611583, "grad_norm": 0.5798017978668213, "learning_rate": 5e-05, "loss": 0.205, "step": 3056 }, { "epoch": 0.9544922332370619, "grad_norm": 0.62482088804245, "learning_rate": 5e-05, "loss": 0.2272, "step": 3057 }, { "epoch": 0.9548044649129654, "grad_norm": 0.6302100419998169, "learning_rate": 5e-05, "loss": 0.2169, "step": 3058 }, { "epoch": 0.9551166965888689, "grad_norm": 0.6818423271179199, "learning_rate": 5e-05, "loss": 0.2305, "step": 3059 }, { "epoch": 0.9554289282647724, "grad_norm": 0.633567750453949, "learning_rate": 5e-05, "loss": 0.2308, "step": 3060 }, { "epoch": 0.955741159940676, "grad_norm": 0.5796268582344055, "learning_rate": 5e-05, "loss": 0.2021, "step": 3061 }, { "epoch": 0.9560533916165795, "grad_norm": 0.6004248261451721, "learning_rate": 5e-05, "loss": 0.226, "step": 3062 }, { "epoch": 0.956365623292483, "grad_norm": 0.5765902400016785, "learning_rate": 5e-05, "loss": 0.1953, "step": 3063 }, { "epoch": 0.9566778549683865, "grad_norm": 0.5843938589096069, "learning_rate": 5e-05, "loss": 0.213, "step": 3064 }, { "epoch": 0.9569900866442901, "grad_norm": 0.58317631483078, "learning_rate": 5e-05, "loss": 0.2082, "step": 3065 }, { "epoch": 0.9573023183201936, "grad_norm": 0.5718657970428467, "learning_rate": 5e-05, "loss": 0.21, "step": 3066 }, { "epoch": 0.9576145499960971, "grad_norm": 0.6368811726570129, "learning_rate": 5e-05, "loss": 0.2371, "step": 3067 }, { "epoch": 0.9579267816720006, "grad_norm": 0.6404739618301392, "learning_rate": 5e-05, "loss": 0.2373, "step": 3068 }, { "epoch": 0.9582390133479042, "grad_norm": 0.5737653970718384, "learning_rate": 5e-05, "loss": 0.2234, "step": 3069 }, { "epoch": 0.9585512450238076, "grad_norm": 0.5952857136726379, "learning_rate": 5e-05, "loss": 0.2098, "step": 3070 }, { "epoch": 0.9588634766997112, "grad_norm": 0.5931748747825623, "learning_rate": 5e-05, "loss": 0.2136, "step": 3071 }, { "epoch": 0.9591757083756147, "grad_norm": 0.6056362986564636, "learning_rate": 5e-05, "loss": 0.213, "step": 3072 }, { "epoch": 0.9594879400515183, "grad_norm": 0.5927523970603943, "learning_rate": 5e-05, "loss": 0.2068, "step": 3073 }, { "epoch": 0.9598001717274217, "grad_norm": 0.6395705938339233, "learning_rate": 5e-05, "loss": 0.2314, "step": 3074 }, { "epoch": 0.9601124034033253, "grad_norm": 0.6063176393508911, "learning_rate": 5e-05, "loss": 0.211, "step": 3075 }, { "epoch": 0.9604246350792288, "grad_norm": 0.594310462474823, "learning_rate": 5e-05, "loss": 0.2061, "step": 3076 }, { "epoch": 0.9607368667551323, "grad_norm": 0.5938969254493713, "learning_rate": 5e-05, "loss": 0.1964, "step": 3077 }, { "epoch": 0.9610490984310358, "grad_norm": 0.6116241812705994, "learning_rate": 5e-05, "loss": 0.2246, "step": 3078 }, { "epoch": 0.9613613301069394, "grad_norm": 0.5974786281585693, "learning_rate": 5e-05, "loss": 0.2282, "step": 3079 }, { "epoch": 0.9616735617828429, "grad_norm": 0.5845942497253418, "learning_rate": 5e-05, "loss": 0.2198, "step": 3080 }, { "epoch": 0.9619857934587464, "grad_norm": 0.6111735701560974, "learning_rate": 5e-05, "loss": 0.2229, "step": 3081 }, { "epoch": 0.9622980251346499, "grad_norm": 0.5908454656600952, "learning_rate": 5e-05, "loss": 0.2049, "step": 3082 }, { "epoch": 0.9626102568105535, "grad_norm": 0.5797688364982605, "learning_rate": 5e-05, "loss": 0.2066, "step": 3083 }, { "epoch": 0.9629224884864569, "grad_norm": 0.6081740260124207, "learning_rate": 5e-05, "loss": 0.2278, "step": 3084 }, { "epoch": 0.9632347201623604, "grad_norm": 0.5721418857574463, "learning_rate": 5e-05, "loss": 0.2105, "step": 3085 }, { "epoch": 0.963546951838264, "grad_norm": 0.5468023419380188, "learning_rate": 5e-05, "loss": 0.2003, "step": 3086 }, { "epoch": 0.9638591835141676, "grad_norm": 0.6317862868309021, "learning_rate": 5e-05, "loss": 0.214, "step": 3087 }, { "epoch": 0.964171415190071, "grad_norm": 0.6359354257583618, "learning_rate": 5e-05, "loss": 0.2148, "step": 3088 }, { "epoch": 0.9644836468659745, "grad_norm": 0.5903233289718628, "learning_rate": 5e-05, "loss": 0.2154, "step": 3089 }, { "epoch": 0.9647958785418781, "grad_norm": 0.5783306360244751, "learning_rate": 5e-05, "loss": 0.209, "step": 3090 }, { "epoch": 0.9651081102177816, "grad_norm": 0.6185778975486755, "learning_rate": 5e-05, "loss": 0.2106, "step": 3091 }, { "epoch": 0.9654203418936851, "grad_norm": 0.5768870711326599, "learning_rate": 5e-05, "loss": 0.1953, "step": 3092 }, { "epoch": 0.9657325735695886, "grad_norm": 0.5665168762207031, "learning_rate": 5e-05, "loss": 0.1903, "step": 3093 }, { "epoch": 0.9660448052454922, "grad_norm": 0.639580249786377, "learning_rate": 5e-05, "loss": 0.2253, "step": 3094 }, { "epoch": 0.9663570369213956, "grad_norm": 0.6010136008262634, "learning_rate": 5e-05, "loss": 0.2199, "step": 3095 }, { "epoch": 0.9666692685972992, "grad_norm": 0.5879684090614319, "learning_rate": 5e-05, "loss": 0.2062, "step": 3096 }, { "epoch": 0.9669815002732027, "grad_norm": 0.6152652502059937, "learning_rate": 5e-05, "loss": 0.2226, "step": 3097 }, { "epoch": 0.9672937319491063, "grad_norm": 0.57731693983078, "learning_rate": 5e-05, "loss": 0.2074, "step": 3098 }, { "epoch": 0.9676059636250097, "grad_norm": 0.6041362881660461, "learning_rate": 5e-05, "loss": 0.2053, "step": 3099 }, { "epoch": 0.9679181953009133, "grad_norm": 0.5791060924530029, "learning_rate": 5e-05, "loss": 0.2101, "step": 3100 }, { "epoch": 0.9682304269768168, "grad_norm": 0.5784593820571899, "learning_rate": 5e-05, "loss": 0.2073, "step": 3101 }, { "epoch": 0.9685426586527203, "grad_norm": 0.6100658774375916, "learning_rate": 5e-05, "loss": 0.2131, "step": 3102 }, { "epoch": 0.9688548903286238, "grad_norm": 0.5894810557365417, "learning_rate": 5e-05, "loss": 0.2045, "step": 3103 }, { "epoch": 0.9691671220045274, "grad_norm": 0.5811233520507812, "learning_rate": 5e-05, "loss": 0.2111, "step": 3104 }, { "epoch": 0.9694793536804309, "grad_norm": 0.5762295126914978, "learning_rate": 5e-05, "loss": 0.2107, "step": 3105 }, { "epoch": 0.9697915853563344, "grad_norm": 0.6365753412246704, "learning_rate": 5e-05, "loss": 0.2278, "step": 3106 }, { "epoch": 0.9701038170322379, "grad_norm": 0.5693429112434387, "learning_rate": 5e-05, "loss": 0.2004, "step": 3107 }, { "epoch": 0.9704160487081415, "grad_norm": 0.5870285630226135, "learning_rate": 5e-05, "loss": 0.2047, "step": 3108 }, { "epoch": 0.9707282803840449, "grad_norm": 0.5743515491485596, "learning_rate": 5e-05, "loss": 0.1991, "step": 3109 }, { "epoch": 0.9710405120599485, "grad_norm": 0.6164810061454773, "learning_rate": 5e-05, "loss": 0.2298, "step": 3110 }, { "epoch": 0.971352743735852, "grad_norm": 0.5748583078384399, "learning_rate": 5e-05, "loss": 0.2045, "step": 3111 }, { "epoch": 0.9716649754117556, "grad_norm": 0.6021985411643982, "learning_rate": 5e-05, "loss": 0.2083, "step": 3112 }, { "epoch": 0.971977207087659, "grad_norm": 0.5979009866714478, "learning_rate": 5e-05, "loss": 0.1926, "step": 3113 }, { "epoch": 0.9722894387635626, "grad_norm": 0.6172654628753662, "learning_rate": 5e-05, "loss": 0.2057, "step": 3114 }, { "epoch": 0.9726016704394661, "grad_norm": 0.5756515264511108, "learning_rate": 5e-05, "loss": 0.2185, "step": 3115 }, { "epoch": 0.9729139021153697, "grad_norm": 0.5934594869613647, "learning_rate": 5e-05, "loss": 0.2071, "step": 3116 }, { "epoch": 0.9732261337912731, "grad_norm": 0.6005604267120361, "learning_rate": 5e-05, "loss": 0.2191, "step": 3117 }, { "epoch": 0.9735383654671766, "grad_norm": 0.6058856248855591, "learning_rate": 5e-05, "loss": 0.2042, "step": 3118 }, { "epoch": 0.9738505971430802, "grad_norm": 0.5904420018196106, "learning_rate": 5e-05, "loss": 0.2051, "step": 3119 }, { "epoch": 0.9741628288189836, "grad_norm": 0.6150959134101868, "learning_rate": 5e-05, "loss": 0.2054, "step": 3120 }, { "epoch": 0.9744750604948872, "grad_norm": 0.6258102059364319, "learning_rate": 5e-05, "loss": 0.225, "step": 3121 }, { "epoch": 0.9747872921707907, "grad_norm": 0.5946921110153198, "learning_rate": 5e-05, "loss": 0.204, "step": 3122 }, { "epoch": 0.9750995238466943, "grad_norm": 0.6079012751579285, "learning_rate": 5e-05, "loss": 0.226, "step": 3123 }, { "epoch": 0.9754117555225977, "grad_norm": 0.5976081490516663, "learning_rate": 5e-05, "loss": 0.2141, "step": 3124 }, { "epoch": 0.9757239871985013, "grad_norm": 0.5866429805755615, "learning_rate": 5e-05, "loss": 0.2106, "step": 3125 }, { "epoch": 0.9760362188744048, "grad_norm": 0.5867417454719543, "learning_rate": 5e-05, "loss": 0.2045, "step": 3126 }, { "epoch": 0.9763484505503083, "grad_norm": 0.5776996612548828, "learning_rate": 5e-05, "loss": 0.1999, "step": 3127 }, { "epoch": 0.9766606822262118, "grad_norm": 0.605657696723938, "learning_rate": 5e-05, "loss": 0.2222, "step": 3128 }, { "epoch": 0.9769729139021154, "grad_norm": 0.6900262832641602, "learning_rate": 5e-05, "loss": 0.2185, "step": 3129 }, { "epoch": 0.9772851455780189, "grad_norm": 0.5859143733978271, "learning_rate": 5e-05, "loss": 0.2095, "step": 3130 }, { "epoch": 0.9775973772539224, "grad_norm": 0.6148985624313354, "learning_rate": 5e-05, "loss": 0.2016, "step": 3131 }, { "epoch": 0.9779096089298259, "grad_norm": 0.594366729259491, "learning_rate": 5e-05, "loss": 0.2117, "step": 3132 }, { "epoch": 0.9782218406057295, "grad_norm": 0.5940199494361877, "learning_rate": 5e-05, "loss": 0.2059, "step": 3133 }, { "epoch": 0.9785340722816329, "grad_norm": 0.6239863634109497, "learning_rate": 5e-05, "loss": 0.212, "step": 3134 }, { "epoch": 0.9788463039575365, "grad_norm": 0.6029557585716248, "learning_rate": 5e-05, "loss": 0.2068, "step": 3135 }, { "epoch": 0.97915853563344, "grad_norm": 0.5946678519248962, "learning_rate": 5e-05, "loss": 0.217, "step": 3136 }, { "epoch": 0.9794707673093436, "grad_norm": 0.5834630131721497, "learning_rate": 5e-05, "loss": 0.2115, "step": 3137 }, { "epoch": 0.979782998985247, "grad_norm": 0.5946319699287415, "learning_rate": 5e-05, "loss": 0.2143, "step": 3138 }, { "epoch": 0.9800952306611506, "grad_norm": 0.5826728940010071, "learning_rate": 5e-05, "loss": 0.1996, "step": 3139 }, { "epoch": 0.9804074623370541, "grad_norm": 0.6149685978889465, "learning_rate": 5e-05, "loss": 0.21, "step": 3140 }, { "epoch": 0.9807196940129577, "grad_norm": 0.5994616746902466, "learning_rate": 5e-05, "loss": 0.2214, "step": 3141 }, { "epoch": 0.9810319256888611, "grad_norm": 0.5723775625228882, "learning_rate": 5e-05, "loss": 0.1995, "step": 3142 }, { "epoch": 0.9813441573647647, "grad_norm": 0.6200574636459351, "learning_rate": 5e-05, "loss": 0.2181, "step": 3143 }, { "epoch": 0.9816563890406682, "grad_norm": 0.5785886645317078, "learning_rate": 5e-05, "loss": 0.2, "step": 3144 }, { "epoch": 0.9819686207165716, "grad_norm": 0.5970657467842102, "learning_rate": 5e-05, "loss": 0.2087, "step": 3145 }, { "epoch": 0.9822808523924752, "grad_norm": 0.586504340171814, "learning_rate": 5e-05, "loss": 0.2086, "step": 3146 }, { "epoch": 0.9825930840683788, "grad_norm": 0.5819817185401917, "learning_rate": 5e-05, "loss": 0.1934, "step": 3147 }, { "epoch": 0.9829053157442823, "grad_norm": 0.6589837670326233, "learning_rate": 5e-05, "loss": 0.2128, "step": 3148 }, { "epoch": 0.9832175474201857, "grad_norm": 0.6399112343788147, "learning_rate": 5e-05, "loss": 0.2144, "step": 3149 }, { "epoch": 0.9835297790960893, "grad_norm": 0.6444038152694702, "learning_rate": 5e-05, "loss": 0.2176, "step": 3150 }, { "epoch": 0.9838420107719928, "grad_norm": 0.5722078084945679, "learning_rate": 5e-05, "loss": 0.2118, "step": 3151 }, { "epoch": 0.9841542424478963, "grad_norm": 0.5606260895729065, "learning_rate": 5e-05, "loss": 0.2009, "step": 3152 }, { "epoch": 0.9844664741237998, "grad_norm": 0.5784137845039368, "learning_rate": 5e-05, "loss": 0.2046, "step": 3153 }, { "epoch": 0.9847787057997034, "grad_norm": 0.5919274687767029, "learning_rate": 5e-05, "loss": 0.218, "step": 3154 }, { "epoch": 0.9850909374756069, "grad_norm": 0.5771549344062805, "learning_rate": 5e-05, "loss": 0.2024, "step": 3155 }, { "epoch": 0.9854031691515104, "grad_norm": 0.551965057849884, "learning_rate": 5e-05, "loss": 0.1989, "step": 3156 }, { "epoch": 0.9857154008274139, "grad_norm": 0.5940678715705872, "learning_rate": 5e-05, "loss": 0.2113, "step": 3157 }, { "epoch": 0.9860276325033175, "grad_norm": 0.5867573618888855, "learning_rate": 5e-05, "loss": 0.2164, "step": 3158 }, { "epoch": 0.9863398641792209, "grad_norm": 0.6553264260292053, "learning_rate": 5e-05, "loss": 0.2122, "step": 3159 }, { "epoch": 0.9866520958551245, "grad_norm": 0.5679608583450317, "learning_rate": 5e-05, "loss": 0.2082, "step": 3160 }, { "epoch": 0.986964327531028, "grad_norm": 0.5794731974601746, "learning_rate": 5e-05, "loss": 0.2056, "step": 3161 }, { "epoch": 0.9872765592069316, "grad_norm": 0.6340915560722351, "learning_rate": 5e-05, "loss": 0.2314, "step": 3162 }, { "epoch": 0.987588790882835, "grad_norm": 0.5856466889381409, "learning_rate": 5e-05, "loss": 0.2205, "step": 3163 }, { "epoch": 0.9879010225587386, "grad_norm": 0.5881797671318054, "learning_rate": 5e-05, "loss": 0.2067, "step": 3164 }, { "epoch": 0.9882132542346421, "grad_norm": 0.5959815382957458, "learning_rate": 5e-05, "loss": 0.2036, "step": 3165 }, { "epoch": 0.9885254859105457, "grad_norm": 0.5638937950134277, "learning_rate": 5e-05, "loss": 0.2038, "step": 3166 }, { "epoch": 0.9888377175864491, "grad_norm": 0.5658978223800659, "learning_rate": 5e-05, "loss": 0.2007, "step": 3167 }, { "epoch": 0.9891499492623527, "grad_norm": 0.6049035787582397, "learning_rate": 5e-05, "loss": 0.1983, "step": 3168 }, { "epoch": 0.9894621809382562, "grad_norm": 0.5913674235343933, "learning_rate": 5e-05, "loss": 0.2093, "step": 3169 }, { "epoch": 0.9897744126141597, "grad_norm": 0.6027672290802002, "learning_rate": 5e-05, "loss": 0.2039, "step": 3170 }, { "epoch": 0.9900866442900632, "grad_norm": 0.6369916796684265, "learning_rate": 5e-05, "loss": 0.2112, "step": 3171 }, { "epoch": 0.9903988759659668, "grad_norm": 0.5783952474594116, "learning_rate": 5e-05, "loss": 0.2036, "step": 3172 }, { "epoch": 0.9907111076418703, "grad_norm": 0.600162148475647, "learning_rate": 5e-05, "loss": 0.191, "step": 3173 }, { "epoch": 0.9910233393177738, "grad_norm": 0.5995122194290161, "learning_rate": 5e-05, "loss": 0.2144, "step": 3174 }, { "epoch": 0.9913355709936773, "grad_norm": 0.5753566026687622, "learning_rate": 5e-05, "loss": 0.2096, "step": 3175 }, { "epoch": 0.9916478026695809, "grad_norm": 0.5944439172744751, "learning_rate": 5e-05, "loss": 0.2121, "step": 3176 }, { "epoch": 0.9919600343454843, "grad_norm": 0.6145507097244263, "learning_rate": 5e-05, "loss": 0.2218, "step": 3177 }, { "epoch": 0.9922722660213878, "grad_norm": 0.5712412595748901, "learning_rate": 5e-05, "loss": 0.2127, "step": 3178 }, { "epoch": 0.9925844976972914, "grad_norm": 0.5999142527580261, "learning_rate": 5e-05, "loss": 0.2047, "step": 3179 }, { "epoch": 0.992896729373195, "grad_norm": 0.5596389174461365, "learning_rate": 5e-05, "loss": 0.2047, "step": 3180 }, { "epoch": 0.9932089610490984, "grad_norm": 0.5939897894859314, "learning_rate": 5e-05, "loss": 0.2105, "step": 3181 }, { "epoch": 0.993521192725002, "grad_norm": 0.5848912596702576, "learning_rate": 5e-05, "loss": 0.2016, "step": 3182 }, { "epoch": 0.9938334244009055, "grad_norm": 0.620559811592102, "learning_rate": 5e-05, "loss": 0.2142, "step": 3183 }, { "epoch": 0.9941456560768089, "grad_norm": 0.5866594314575195, "learning_rate": 5e-05, "loss": 0.2088, "step": 3184 }, { "epoch": 0.9944578877527125, "grad_norm": 0.6290944814682007, "learning_rate": 5e-05, "loss": 0.2236, "step": 3185 }, { "epoch": 0.994770119428616, "grad_norm": 0.5795429944992065, "learning_rate": 5e-05, "loss": 0.1907, "step": 3186 }, { "epoch": 0.9950823511045196, "grad_norm": 0.6115344166755676, "learning_rate": 5e-05, "loss": 0.2139, "step": 3187 }, { "epoch": 0.995394582780423, "grad_norm": 0.6015181541442871, "learning_rate": 5e-05, "loss": 0.2165, "step": 3188 }, { "epoch": 0.9957068144563266, "grad_norm": 0.6264814138412476, "learning_rate": 5e-05, "loss": 0.21, "step": 3189 }, { "epoch": 0.9960190461322301, "grad_norm": 0.5744755268096924, "learning_rate": 5e-05, "loss": 0.2105, "step": 3190 }, { "epoch": 0.9963312778081337, "grad_norm": 0.5803213119506836, "learning_rate": 5e-05, "loss": 0.2131, "step": 3191 }, { "epoch": 0.9966435094840371, "grad_norm": 0.5660994648933411, "learning_rate": 5e-05, "loss": 0.2006, "step": 3192 }, { "epoch": 0.9969557411599407, "grad_norm": 0.5874443054199219, "learning_rate": 5e-05, "loss": 0.2043, "step": 3193 }, { "epoch": 0.9972679728358442, "grad_norm": 0.611182451248169, "learning_rate": 5e-05, "loss": 0.2295, "step": 3194 }, { "epoch": 0.9975802045117477, "grad_norm": 0.6042623519897461, "learning_rate": 5e-05, "loss": 0.2191, "step": 3195 }, { "epoch": 0.9978924361876512, "grad_norm": 0.5912525653839111, "learning_rate": 5e-05, "loss": 0.2167, "step": 3196 }, { "epoch": 0.9982046678635548, "grad_norm": 0.5869390368461609, "learning_rate": 5e-05, "loss": 0.21, "step": 3197 }, { "epoch": 0.9985168995394583, "grad_norm": 0.5852372646331787, "learning_rate": 5e-05, "loss": 0.2185, "step": 3198 }, { "epoch": 0.9988291312153618, "grad_norm": 0.5835855603218079, "learning_rate": 5e-05, "loss": 0.2058, "step": 3199 }, { "epoch": 0.9991413628912653, "grad_norm": 0.6790762543678284, "learning_rate": 5e-05, "loss": 0.21, "step": 3200 }, { "epoch": 0.9994535945671689, "grad_norm": 0.6058212518692017, "learning_rate": 5e-05, "loss": 0.2064, "step": 3201 }, { "epoch": 0.9997658262430723, "grad_norm": 0.5708680748939514, "learning_rate": 5e-05, "loss": 0.2012, "step": 3202 } ], "logging_steps": 1, "max_steps": 3202, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.188701077756969e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }